diff options
Diffstat (limited to 'contrib/llvm/tools/lld')
168 files changed, 51704 insertions, 0 deletions
diff --git a/contrib/llvm/tools/lld/.arcconfig b/contrib/llvm/tools/lld/.arcconfig new file mode 100644 index 000000000000..ebf4a4a6f8b7 --- /dev/null +++ b/contrib/llvm/tools/lld/.arcconfig @@ -0,0 +1,4 @@ +{ + "project_id" : "lld", + "conduit_uri" : "https://reviews.llvm.org/" +} diff --git a/contrib/llvm/tools/lld/.clang-format b/contrib/llvm/tools/lld/.clang-format new file mode 100644 index 000000000000..9b3aa8b7213b --- /dev/null +++ b/contrib/llvm/tools/lld/.clang-format @@ -0,0 +1 @@ +BasedOnStyle: LLVM diff --git a/contrib/llvm/tools/lld/.gitignore b/contrib/llvm/tools/lld/.gitignore new file mode 100644 index 000000000000..0a288ee8ce96 --- /dev/null +++ b/contrib/llvm/tools/lld/.gitignore @@ -0,0 +1,24 @@ +#==============================================================================# +# This file specifies intentionally untracked files that git should ignore. +# See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html +#==============================================================================# + +#==============================================================================# +# File extensions to be ignored anywhere in the tree. +#==============================================================================# +# Temp files created by most text editors. +*~ +# Merge files created by git. +*.orig +# Byte compiled python modules. +*.pyc +# vim swap files +.*.swp +# Mac OS X Finder layout info +.DS_Store + +#==============================================================================# +# Directories to be ignored. +#==============================================================================# +# Sphinx build files. +docs/_build diff --git a/contrib/llvm/tools/lld/CMakeLists.txt b/contrib/llvm/tools/lld/CMakeLists.txt new file mode 100644 index 000000000000..23cef2e9fc67 --- /dev/null +++ b/contrib/llvm/tools/lld/CMakeLists.txt @@ -0,0 +1,155 @@ +# Check if lld is built as a standalone project. +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + project(lld) + cmake_minimum_required(VERSION 3.4.3) + + set(CMAKE_INCLUDE_CURRENT_DIR ON) + set(LLD_BUILT_STANDALONE TRUE) + + find_program(LLVM_CONFIG_PATH "llvm-config" DOC "Path to llvm-config binary") + if(NOT LLVM_CONFIG_PATH) + message(FATAL_ERROR "llvm-config not found: specify LLVM_CONFIG_PATH") + endif() + + execute_process(COMMAND "${LLVM_CONFIG_PATH}" "--obj-root" "--includedir" + RESULT_VARIABLE HAD_ERROR + OUTPUT_VARIABLE LLVM_CONFIG_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(HAD_ERROR) + message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}") + endif() + + string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" LLVM_CONFIG_OUTPUT "${LLVM_CONFIG_OUTPUT}") + + list(GET LLVM_CONFIG_OUTPUT 0 OBJ_ROOT) + list(GET LLVM_CONFIG_OUTPUT 1 MAIN_INCLUDE_DIR) + + set(LLVM_OBJ_ROOT ${OBJ_ROOT} CACHE PATH "path to LLVM build tree") + set(LLVM_MAIN_INCLUDE_DIR ${MAIN_INCLUDE_DIR} CACHE PATH "path to llvm/include") + + file(TO_CMAKE_PATH ${LLVM_OBJ_ROOT} LLVM_BINARY_DIR) + set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm") + + if(NOT EXISTS "${LLVM_CMAKE_PATH}/LLVMConfig.cmake") + message(FATAL_ERROR "LLVMConfig.cmake not found") + endif() + include("${LLVM_CMAKE_PATH}/LLVMConfig.cmake") + + list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}") + + set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}") + include_directories("${LLVM_BINARY_DIR}/include" ${LLVM_INCLUDE_DIRS}) + link_directories(${LLVM_LIBRARY_DIRS}) + + set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) + find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) + + include(AddLLVM) + include(TableGen) + include(HandleLLVMOptions) +endif() + +set(LLD_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(LLD_INCLUDE_DIR ${LLD_SOURCE_DIR}/include ) +set(LLD_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +# Compute the LLD version from the LLVM version. +string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" LLD_VERSION + ${PACKAGE_VERSION}) +message(STATUS "LLD version: ${LLD_VERSION}") + +string(REGEX REPLACE "([0-9]+)\\.[0-9]+(\\.[0-9]+)?" "\\1" LLD_VERSION_MAJOR + ${LLD_VERSION}) +string(REGEX REPLACE "[0-9]+\\.([0-9]+)(\\.[0-9]+)?" "\\1" LLD_VERSION_MINOR + ${LLD_VERSION}) + +# Determine LLD revision and repository. +# TODO: Figure out a way to get the revision and the repository on windows. +if ( NOT CMAKE_SYSTEM_NAME MATCHES "Windows" ) + execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetSourceVersion ${LLD_SOURCE_DIR} + OUTPUT_VARIABLE LLD_REVISION) + + execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetRepositoryPath ${LLD_SOURCE_DIR} + OUTPUT_VARIABLE LLD_REPOSITORY) + if ( LLD_REPOSITORY ) + # Replace newline characters with spaces + string(REGEX REPLACE "(\r?\n)+" " " LLD_REPOSITORY ${LLD_REPOSITORY}) + # Remove leading spaces + STRING(REGEX REPLACE "^[ \t\r\n]+" "" LLD_REPOSITORY "${LLD_REPOSITORY}" ) + # Remove trailing spaces + string(REGEX REPLACE "(\ )+$" "" LLD_REPOSITORY ${LLD_REPOSITORY}) + endif() + + if ( LLD_REVISION ) + # Replace newline characters with spaces + string(REGEX REPLACE "(\r?\n)+" " " LLD_REVISION ${LLD_REVISION}) + # Remove leading spaces + STRING(REGEX REPLACE "^[ \t\r\n]+" "" LLD_REVISION "${LLD_REVISION}" ) + # Remove trailing spaces + string(REGEX REPLACE "(\ )+$" "" LLD_REVISION ${LLD_REVISION}) + endif() +endif () + +# Configure the Version.inc file. +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/include/lld/Config/Version.inc.in + ${CMAKE_CURRENT_BINARY_DIR}/include/lld/Config/Version.inc) + + +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) + message(FATAL_ERROR "In-source builds are not allowed. CMake would overwrite " +"the makefiles distributed with LLVM. Please create a directory and run cmake " +"from there, passing the path to this source directory as the last argument. " +"This process created the file `CMakeCache.txt' and the directory " +"`CMakeFiles'. Please delete them.") +endif() + +list (APPEND CMAKE_MODULE_PATH "${LLD_SOURCE_DIR}/cmake/modules") + +include(AddLLD) + +option(LLD_USE_VTUNE + "Enable VTune user task tracking." + OFF) +if (LLD_USE_VTUNE) + find_package(VTune) + if (VTUNE_FOUND) + include_directories(${VTune_INCLUDE_DIRS}) + list(APPEND LLVM_COMMON_LIBS ${VTune_LIBRARIES}) + add_definitions(-DLLD_HAS_VTUNE) + endif() +endif() + +option(LLD_BUILD_TOOLS + "Build the lld tools. If OFF, just generate build targets." ON) + +if (MSVC) + add_definitions(-wd4530) # Suppress 'warning C4530: C++ exception handler used, but unwind semantics are not enabled.' + add_definitions(-wd4062) # Suppress 'warning C4062: enumerator X in switch of enum Y is not handled' from system header. +endif() + +include_directories(BEFORE + ${CMAKE_CURRENT_BINARY_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/include + ) + +if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + install(DIRECTORY include/ + DESTINATION include + FILES_MATCHING + PATTERN "*.h" + PATTERN ".svn" EXCLUDE + ) +endif() + +add_subdirectory(lib) +add_subdirectory(tools/lld) + +if (LLVM_INCLUDE_TESTS) + add_subdirectory(test) + add_subdirectory(unittests) +endif() + +add_subdirectory(docs) +add_subdirectory(COFF) +add_subdirectory(ELF) diff --git a/contrib/llvm/tools/lld/CODE_OWNERS.TXT b/contrib/llvm/tools/lld/CODE_OWNERS.TXT new file mode 100644 index 000000000000..292967e588f0 --- /dev/null +++ b/contrib/llvm/tools/lld/CODE_OWNERS.TXT @@ -0,0 +1,19 @@ +This file is a list of the people responsible for ensuring that patches for a +particular part of LLD are reviewed, either by themself or by someone else. +They are also the gatekeepers for their part of LLD, with the final word on +what goes in or not. + +The list is sorted by surname and formatted to allow easy grepping and +beautification by scripts. The fields are: name (N), email (E), web-address +(W), PGP key ID and fingerprint (P), description (D), and snail-mail address +(S). Each entry should contain at least the (N), (E) and (D) fields. + + +N: Rui Ueyama +E: ruiu@google.com +D: COFF, ELF backends (COFF/* ELF/*) + +N: Lang Hames, Nick Kledzik +E: lhames@gmail.com, kledzik@apple.com +D: Mach-O backend + diff --git a/contrib/llvm/tools/lld/COFF/CMakeLists.txt b/contrib/llvm/tools/lld/COFF/CMakeLists.txt new file mode 100644 index 000000000000..70a33b9fdd81 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/CMakeLists.txt @@ -0,0 +1,48 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(COFFOptionsTableGen) + +if(NOT LLD_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + +add_lld_library(lldCOFF + Chunks.cpp + DLL.cpp + Driver.cpp + DriverUtils.cpp + Error.cpp + ICF.cpp + InputFiles.cpp + Librarian.cpp + MarkLive.cpp + ModuleDef.cpp + PDB.cpp + Strings.cpp + SymbolTable.cpp + Symbols.cpp + Writer.cpp + + LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Core + DebugInfoCodeView + DebugInfoMSF + DebugInfoPDB + LTO + LibDriver + Object + MC + MCDisassembler + Target + Option + Support + + LINK_LIBS + lldCore + ${PTHREAD_LIB} + + DEPENDS + COFFOptionsTableGen + ${tablegen_deps} + ) diff --git a/contrib/llvm/tools/lld/COFF/Chunks.cpp b/contrib/llvm/tools/lld/COFF/Chunks.cpp new file mode 100644 index 000000000000..7f0dfa92ec10 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Chunks.cpp @@ -0,0 +1,353 @@ +//===- Chunks.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "Error.h" +#include "InputFiles.h" +#include "Symbols.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::COFF; +using llvm::support::ulittle32_t; + +namespace lld { +namespace coff { + +SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H) + : Chunk(SectionKind), Repl(this), Header(H), File(F), + Relocs(File->getCOFFObj()->getRelocations(Header)), + NumRelocs(std::distance(Relocs.begin(), Relocs.end())) { + // Initialize SectionName. + File->getCOFFObj()->getSectionName(Header, SectionName); + + Align = Header->getAlignment(); + + // Only COMDAT sections are subject of dead-stripping. + Live = !isCOMDAT(); +} + +static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); } +static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); } +static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); } +static void or16(uint8_t *P, uint16_t V) { write16le(P, read16le(P) | V); } + +void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, Defined *Sym, + uint64_t P) const { + uint64_t S = Sym->getRVA(); + switch (Type) { + case IMAGE_REL_AMD64_ADDR32: add32(Off, S + Config->ImageBase); break; + case IMAGE_REL_AMD64_ADDR64: add64(Off, S + Config->ImageBase); break; + case IMAGE_REL_AMD64_ADDR32NB: add32(Off, S); break; + case IMAGE_REL_AMD64_REL32: add32(Off, S - P - 4); break; + case IMAGE_REL_AMD64_REL32_1: add32(Off, S - P - 5); break; + case IMAGE_REL_AMD64_REL32_2: add32(Off, S - P - 6); break; + case IMAGE_REL_AMD64_REL32_3: add32(Off, S - P - 7); break; + case IMAGE_REL_AMD64_REL32_4: add32(Off, S - P - 8); break; + case IMAGE_REL_AMD64_REL32_5: add32(Off, S - P - 9); break; + case IMAGE_REL_AMD64_SECTION: add16(Off, Sym->getSectionIndex()); break; + case IMAGE_REL_AMD64_SECREL: add32(Off, Sym->getSecrel()); break; + default: + fatal("unsupported relocation type"); + } +} + +void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym, + uint64_t P) const { + uint64_t S = Sym->getRVA(); + switch (Type) { + case IMAGE_REL_I386_ABSOLUTE: break; + case IMAGE_REL_I386_DIR32: add32(Off, S + Config->ImageBase); break; + case IMAGE_REL_I386_DIR32NB: add32(Off, S); break; + case IMAGE_REL_I386_REL32: add32(Off, S - P - 4); break; + case IMAGE_REL_I386_SECTION: add16(Off, Sym->getSectionIndex()); break; + case IMAGE_REL_I386_SECREL: add32(Off, Sym->getSecrel()); break; + default: + fatal("unsupported relocation type"); + } +} + +static void applyMOV(uint8_t *Off, uint16_t V) { + write16le(Off, (read16le(Off) & 0xfbf0) | ((V & 0x800) >> 1) | ((V >> 12) & 0xf)); + write16le(Off + 2, (read16le(Off + 2) & 0x8f00) | ((V & 0x700) << 4) | (V & 0xff)); +} + +static uint16_t readMOV(uint8_t *Off) { + uint16_t Opcode1 = read16le(Off); + uint16_t Opcode2 = read16le(Off + 2); + uint16_t Imm = (Opcode2 & 0x00ff) | ((Opcode2 >> 4) & 0x0700); + Imm |= ((Opcode1 << 1) & 0x0800) | ((Opcode1 & 0x000f) << 12); + return Imm; +} + +static void applyMOV32T(uint8_t *Off, uint32_t V) { + uint16_t ImmW = readMOV(Off); // read MOVW operand + uint16_t ImmT = readMOV(Off + 4); // read MOVT operand + uint32_t Imm = ImmW | (ImmT << 16); + V += Imm; // add the immediate offset + applyMOV(Off, V); // set MOVW operand + applyMOV(Off + 4, V >> 16); // set MOVT operand +} + +static void applyBranch20T(uint8_t *Off, int32_t V) { + uint32_t S = V < 0 ? 1 : 0; + uint32_t J1 = (V >> 19) & 1; + uint32_t J2 = (V >> 18) & 1; + or16(Off, (S << 10) | ((V >> 12) & 0x3f)); + or16(Off + 2, (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff)); +} + +static void applyBranch24T(uint8_t *Off, int32_t V) { + if (!isInt<25>(V)) + fatal("relocation out of range"); + uint32_t S = V < 0 ? 1 : 0; + uint32_t J1 = ((~V >> 23) & 1) ^ S; + uint32_t J2 = ((~V >> 22) & 1) ^ S; + or16(Off, (S << 10) | ((V >> 12) & 0x3ff)); + // Clear out the J1 and J2 bits which may be set. + write16le(Off + 2, (read16le(Off + 2) & 0xd000) | (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff)); +} + +void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, + uint64_t P) const { + uint64_t S = Sym->getRVA(); + // Pointer to thumb code must have the LSB set. + if (Sym->isExecutable()) + S |= 1; + switch (Type) { + case IMAGE_REL_ARM_ADDR32: add32(Off, S + Config->ImageBase); break; + case IMAGE_REL_ARM_ADDR32NB: add32(Off, S); break; + case IMAGE_REL_ARM_MOV32T: applyMOV32T(Off, S + Config->ImageBase); break; + case IMAGE_REL_ARM_BRANCH20T: applyBranch20T(Off, S - P - 4); break; + case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(Off, S - P - 4); break; + case IMAGE_REL_ARM_BLX23T: applyBranch24T(Off, S - P - 4); break; + case IMAGE_REL_ARM_SECREL: add32(Off, Sym->getSecrel()); break; + default: + fatal("unsupported relocation type"); + } +} + +void SectionChunk::writeTo(uint8_t *Buf) const { + if (!hasData()) + return; + // Copy section contents from source object file to output file. + ArrayRef<uint8_t> A = getContents(); + memcpy(Buf + OutputSectionOff, A.data(), A.size()); + + // Apply relocations. + for (const coff_relocation &Rel : Relocs) { + uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress; + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); + Defined *Sym = cast<Defined>(Body); + uint64_t P = RVA + Rel.VirtualAddress; + switch (Config->Machine) { + case AMD64: + applyRelX64(Off, Rel.Type, Sym, P); + break; + case I386: + applyRelX86(Off, Rel.Type, Sym, P); + break; + case ARMNT: + applyRelARM(Off, Rel.Type, Sym, P); + break; + default: + llvm_unreachable("unknown machine type"); + } + } +} + +void SectionChunk::addAssociative(SectionChunk *Child) { + AssocChildren.push_back(Child); +} + +static uint8_t getBaserelType(const coff_relocation &Rel) { + switch (Config->Machine) { + case AMD64: + if (Rel.Type == IMAGE_REL_AMD64_ADDR64) + return IMAGE_REL_BASED_DIR64; + return IMAGE_REL_BASED_ABSOLUTE; + case I386: + if (Rel.Type == IMAGE_REL_I386_DIR32) + return IMAGE_REL_BASED_HIGHLOW; + return IMAGE_REL_BASED_ABSOLUTE; + case ARMNT: + if (Rel.Type == IMAGE_REL_ARM_ADDR32) + return IMAGE_REL_BASED_HIGHLOW; + if (Rel.Type == IMAGE_REL_ARM_MOV32T) + return IMAGE_REL_BASED_ARM_MOV32T; + return IMAGE_REL_BASED_ABSOLUTE; + default: + llvm_unreachable("unknown machine type"); + } +} + +// Windows-specific. +// Collect all locations that contain absolute addresses, which need to be +// fixed by the loader if load-time relocation is needed. +// Only called when base relocation is enabled. +void SectionChunk::getBaserels(std::vector<Baserel> *Res) { + for (const coff_relocation &Rel : Relocs) { + uint8_t Ty = getBaserelType(Rel); + if (Ty == IMAGE_REL_BASED_ABSOLUTE) + continue; + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); + if (isa<DefinedAbsolute>(Body)) + continue; + Res->emplace_back(RVA + Rel.VirtualAddress, Ty); + } +} + +bool SectionChunk::hasData() const { + return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA); +} + +uint32_t SectionChunk::getPermissions() const { + return Header->Characteristics & PermMask; +} + +bool SectionChunk::isCOMDAT() const { + return Header->Characteristics & IMAGE_SCN_LNK_COMDAT; +} + +void SectionChunk::printDiscardedMessage() const { + // Removed by dead-stripping. If it's removed by ICF, ICF already + // printed out the name, so don't repeat that here. + if (Sym && this == Repl) + outs() << "Discarded " << Sym->getName() << "\n"; +} + +StringRef SectionChunk::getDebugName() { + if (Sym) + return Sym->getName(); + return ""; +} + +ArrayRef<uint8_t> SectionChunk::getContents() const { + ArrayRef<uint8_t> A; + File->getCOFFObj()->getSectionContents(Header, A); + return A; +} + +void SectionChunk::replace(SectionChunk *Other) { + Other->Repl = Repl; + Other->Live = false; +} + +CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) { + // Common symbols are aligned on natural boundaries up to 32 bytes. + // This is what MSVC link.exe does. + Align = std::min(uint64_t(32), PowerOf2Ceil(Sym.getValue())); +} + +uint32_t CommonChunk::getPermissions() const { + return IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE; +} + +void StringChunk::writeTo(uint8_t *Buf) const { + memcpy(Buf + OutputSectionOff, Str.data(), Str.size()); +} + +ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImpSymbol(S) { + // Intel Optimization Manual says that all branch targets + // should be 16-byte aligned. MSVC linker does this too. + Align = 16; +} + +void ImportThunkChunkX64::writeTo(uint8_t *Buf) const { + memcpy(Buf + OutputSectionOff, ImportThunkX86, sizeof(ImportThunkX86)); + // The first two bytes is a JMP instruction. Fill its operand. + write32le(Buf + OutputSectionOff + 2, ImpSymbol->getRVA() - RVA - getSize()); +} + +void ImportThunkChunkX86::getBaserels(std::vector<Baserel> *Res) { + Res->emplace_back(getRVA() + 2); +} + +void ImportThunkChunkX86::writeTo(uint8_t *Buf) const { + memcpy(Buf + OutputSectionOff, ImportThunkX86, sizeof(ImportThunkX86)); + // The first two bytes is a JMP instruction. Fill its operand. + write32le(Buf + OutputSectionOff + 2, + ImpSymbol->getRVA() + Config->ImageBase); +} + +void ImportThunkChunkARM::getBaserels(std::vector<Baserel> *Res) { + Res->emplace_back(getRVA(), IMAGE_REL_BASED_ARM_MOV32T); +} + +void ImportThunkChunkARM::writeTo(uint8_t *Buf) const { + memcpy(Buf + OutputSectionOff, ImportThunkARM, sizeof(ImportThunkARM)); + // Fix mov.w and mov.t operands. + applyMOV32T(Buf + OutputSectionOff, ImpSymbol->getRVA() + Config->ImageBase); +} + +void LocalImportChunk::getBaserels(std::vector<Baserel> *Res) { + Res->emplace_back(getRVA()); +} + +size_t LocalImportChunk::getSize() const { + return Config->is64() ? 8 : 4; +} + +void LocalImportChunk::writeTo(uint8_t *Buf) const { + if (Config->is64()) { + write64le(Buf + OutputSectionOff, Sym->getRVA() + Config->ImageBase); + } else { + write32le(Buf + OutputSectionOff, Sym->getRVA() + Config->ImageBase); + } +} + +void SEHTableChunk::writeTo(uint8_t *Buf) const { + ulittle32_t *Begin = reinterpret_cast<ulittle32_t *>(Buf + OutputSectionOff); + size_t Cnt = 0; + for (Defined *D : Syms) + Begin[Cnt++] = D->getRVA(); + std::sort(Begin, Begin + Cnt); +} + +// Windows-specific. +// This class represents a block in .reloc section. +BaserelChunk::BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End) { + // Block header consists of 4 byte page RVA and 4 byte block size. + // Each entry is 2 byte. Last entry may be padding. + Data.resize(alignTo((End - Begin) * 2 + 8, 4)); + uint8_t *P = Data.data(); + write32le(P, Page); + write32le(P + 4, Data.size()); + P += 8; + for (Baserel *I = Begin; I != End; ++I) { + write16le(P, (I->Type << 12) | (I->RVA - Page)); + P += 2; + } +} + +void BaserelChunk::writeTo(uint8_t *Buf) const { + memcpy(Buf + OutputSectionOff, Data.data(), Data.size()); +} + +uint8_t Baserel::getDefaultType() { + switch (Config->Machine) { + case AMD64: + return IMAGE_REL_BASED_DIR64; + case I386: + return IMAGE_REL_BASED_HIGHLOW; + default: + llvm_unreachable("unknown machine type"); + } +} + +} // namespace coff +} // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Chunks.h b/contrib/llvm/tools/lld/COFF/Chunks.h new file mode 100644 index 000000000000..59e36b84c9b0 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Chunks.h @@ -0,0 +1,331 @@ +//===- Chunks.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_CHUNKS_H +#define LLD_COFF_CHUNKS_H + +#include "Config.h" +#include "InputFiles.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Object/COFF.h" +#include <utility> +#include <vector> + +namespace lld { +namespace coff { + +using llvm::COFF::ImportDirectoryTableEntry; +using llvm::object::COFFSymbolRef; +using llvm::object::SectionRef; +using llvm::object::coff_relocation; +using llvm::object::coff_section; + +class Baserel; +class Defined; +class DefinedImportData; +class DefinedRegular; +class ObjectFile; +class OutputSection; +class SymbolBody; + +// Mask for section types (code, data, bss, disacardable, etc.) +// and permissions (writable, readable or executable). +const uint32_t PermMask = 0xFF0000F0; + +// A Chunk represents a chunk of data that will occupy space in the +// output (if the resolver chose that). It may or may not be backed by +// a section of an input file. It could be linker-created data, or +// doesn't even have actual data (if common or bss). +class Chunk { +public: + enum Kind { SectionKind, OtherKind }; + Kind kind() const { return ChunkKind; } + virtual ~Chunk() = default; + + // Returns the size of this chunk (even if this is a common or BSS.) + virtual size_t getSize() const = 0; + + // Write this chunk to a mmap'ed file, assuming Buf is pointing to + // beginning of the file. Because this function may use RVA values + // of other chunks for relocations, you need to set them properly + // before calling this function. + virtual void writeTo(uint8_t *Buf) const {} + + // The writer sets and uses the addresses. + uint64_t getRVA() const { return RVA; } + uint32_t getAlign() const { return Align; } + void setRVA(uint64_t V) { RVA = V; } + void setOutputSectionOff(uint64_t V) { OutputSectionOff = V; } + + // Returns true if this has non-zero data. BSS chunks return + // false. If false is returned, the space occupied by this chunk + // will be filled with zeros. + virtual bool hasData() const { return true; } + + // Returns readable/writable/executable bits. + virtual uint32_t getPermissions() const { return 0; } + + // Returns the section name if this is a section chunk. + // It is illegal to call this function on non-section chunks. + virtual StringRef getSectionName() const { + llvm_unreachable("unimplemented getSectionName"); + } + + // An output section has pointers to chunks in the section, and each + // chunk has a back pointer to an output section. + void setOutputSection(OutputSection *O) { Out = O; } + OutputSection *getOutputSection() { return Out; } + + // Windows-specific. + // Collect all locations that contain absolute addresses for base relocations. + virtual void getBaserels(std::vector<Baserel> *Res) {} + + // Returns a human-readable name of this chunk. Chunks are unnamed chunks of + // bytes, so this is used only for logging or debugging. + virtual StringRef getDebugName() { return ""; } + +protected: + Chunk(Kind K = OtherKind) : ChunkKind(K) {} + const Kind ChunkKind; + + // The RVA of this chunk in the output. The writer sets a value. + uint64_t RVA = 0; + + // The offset from beginning of the output section. The writer sets a value. + uint64_t OutputSectionOff = 0; + + // The output section for this chunk. + OutputSection *Out = nullptr; + + // The alignment of this chunk. The writer uses the value. + uint32_t Align = 1; +}; + +// A chunk corresponding a section of an input file. +class SectionChunk : public Chunk { + // Identical COMDAT Folding feature accesses section internal data. + friend class ICF; + +public: + class symbol_iterator : public llvm::iterator_adaptor_base< + symbol_iterator, const coff_relocation *, + std::random_access_iterator_tag, SymbolBody *> { + friend SectionChunk; + + ObjectFile *File; + + symbol_iterator(ObjectFile *File, const coff_relocation *I) + : symbol_iterator::iterator_adaptor_base(I), File(File) {} + + public: + symbol_iterator() = default; + + SymbolBody *operator*() const { + return File->getSymbolBody(I->SymbolTableIndex); + } + }; + + SectionChunk(ObjectFile *File, const coff_section *Header); + static bool classof(const Chunk *C) { return C->kind() == SectionKind; } + size_t getSize() const override { return Header->SizeOfRawData; } + ArrayRef<uint8_t> getContents() const; + void writeTo(uint8_t *Buf) const override; + bool hasData() const override; + uint32_t getPermissions() const override; + StringRef getSectionName() const override { return SectionName; } + void getBaserels(std::vector<Baserel> *Res) override; + bool isCOMDAT() const; + void applyRelX64(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const; + void applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const; + void applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const; + + // Called if the garbage collector decides to not include this chunk + // in a final output. It's supposed to print out a log message to stdout. + void printDiscardedMessage() const; + + // Adds COMDAT associative sections to this COMDAT section. A chunk + // and its children are treated as a group by the garbage collector. + void addAssociative(SectionChunk *Child); + + StringRef getDebugName() override; + void setSymbol(DefinedRegular *S) { if (!Sym) Sym = S; } + + // Used by the garbage collector. + bool isLive() { return !Config->DoGC || Live; } + void markLive() { + assert(!isLive() && "Cannot mark an already live section!"); + Live = true; + } + + // Allow iteration over the bodies of this chunk's relocated symbols. + llvm::iterator_range<symbol_iterator> symbols() const { + return llvm::make_range(symbol_iterator(File, Relocs.begin()), + symbol_iterator(File, Relocs.end())); + } + + // Allow iteration over the associated child chunks for this section. + ArrayRef<SectionChunk *> children() const { return AssocChildren; } + + // A pointer pointing to a replacement for this chunk. + // Initially it points to "this" object. If this chunk is merged + // with other chunk by ICF, it points to another chunk, + // and this chunk is considrered as dead. + SectionChunk *Repl; + + // The CRC of the contents as described in the COFF spec 4.5.5. + // Auxiliary Format 5: Section Definitions. Used for ICF. + uint32_t Checksum = 0; + + const coff_section *Header; + +private: + // A file this chunk was created from. + ObjectFile *File; + + StringRef SectionName; + std::vector<SectionChunk *> AssocChildren; + llvm::iterator_range<const coff_relocation *> Relocs; + size_t NumRelocs; + + // Used by the garbage collector. + bool Live; + + // Used for ICF (Identical COMDAT Folding) + void replace(SectionChunk *Other); + uint32_t Color[2] = {0, 0}; + + // Sym points to a section symbol if this is a COMDAT chunk. + DefinedRegular *Sym = nullptr; +}; + +// A chunk for common symbols. Common chunks don't have actual data. +class CommonChunk : public Chunk { +public: + CommonChunk(const COFFSymbolRef Sym); + size_t getSize() const override { return Sym.getValue(); } + bool hasData() const override { return false; } + uint32_t getPermissions() const override; + StringRef getSectionName() const override { return ".bss"; } + +private: + const COFFSymbolRef Sym; +}; + +// A chunk for linker-created strings. +class StringChunk : public Chunk { +public: + explicit StringChunk(StringRef S) : Str(S) {} + size_t getSize() const override { return Str.size() + 1; } + void writeTo(uint8_t *Buf) const override; + +private: + StringRef Str; +}; + +static const uint8_t ImportThunkX86[] = { + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0 +}; + +static const uint8_t ImportThunkARM[] = { + 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0 + 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0 + 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip] +}; + +// Windows-specific. +// A chunk for DLL import jump table entry. In a final output, it's +// contents will be a JMP instruction to some __imp_ symbol. +class ImportThunkChunkX64 : public Chunk { +public: + explicit ImportThunkChunkX64(Defined *S); + size_t getSize() const override { return sizeof(ImportThunkX86); } + void writeTo(uint8_t *Buf) const override; + +private: + Defined *ImpSymbol; +}; + +class ImportThunkChunkX86 : public Chunk { +public: + explicit ImportThunkChunkX86(Defined *S) : ImpSymbol(S) {} + size_t getSize() const override { return sizeof(ImportThunkX86); } + void getBaserels(std::vector<Baserel> *Res) override; + void writeTo(uint8_t *Buf) const override; + +private: + Defined *ImpSymbol; +}; + +class ImportThunkChunkARM : public Chunk { +public: + explicit ImportThunkChunkARM(Defined *S) : ImpSymbol(S) {} + size_t getSize() const override { return sizeof(ImportThunkARM); } + void getBaserels(std::vector<Baserel> *Res) override; + void writeTo(uint8_t *Buf) const override; + +private: + Defined *ImpSymbol; +}; + +// Windows-specific. +// See comments for DefinedLocalImport class. +class LocalImportChunk : public Chunk { +public: + explicit LocalImportChunk(Defined *S) : Sym(S) {} + size_t getSize() const override; + void getBaserels(std::vector<Baserel> *Res) override; + void writeTo(uint8_t *Buf) const override; + +private: + Defined *Sym; +}; + +// Windows-specific. +// A chunk for SEH table which contains RVAs of safe exception handler +// functions. x86-only. +class SEHTableChunk : public Chunk { +public: + explicit SEHTableChunk(std::set<Defined *> S) : Syms(std::move(S)) {} + size_t getSize() const override { return Syms.size() * 4; } + void writeTo(uint8_t *Buf) const override; + +private: + std::set<Defined *> Syms; +}; + +// Windows-specific. +// This class represents a block in .reloc section. +// See the PE/COFF spec 5.6 for details. +class BaserelChunk : public Chunk { +public: + BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End); + size_t getSize() const override { return Data.size(); } + void writeTo(uint8_t *Buf) const override; + +private: + std::vector<uint8_t> Data; +}; + +class Baserel { +public: + Baserel(uint32_t V, uint8_t Ty) : RVA(V), Type(Ty) {} + explicit Baserel(uint32_t V) : Baserel(V, getDefaultType()) {} + uint8_t getDefaultType(); + + uint32_t RVA; + uint8_t Type; +}; + +} // namespace coff +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/COFF/Config.h b/contrib/llvm/tools/lld/COFF/Config.h new file mode 100644 index 000000000000..0fa3338aa28c --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Config.h @@ -0,0 +1,165 @@ +//===- Config.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_CONFIG_H +#define LLD_COFF_CONFIG_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/COFF.h" +#include <cstdint> +#include <map> +#include <set> +#include <string> + +namespace lld { +namespace coff { + +using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; +using llvm::COFF::WindowsSubsystem; +using llvm::StringRef; +class DefinedAbsolute; +class DefinedRelative; +class StringChunk; +struct Symbol; +class SymbolBody; + +// Short aliases. +static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; +static const auto ARMNT = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT; +static const auto I386 = llvm::COFF::IMAGE_FILE_MACHINE_I386; + +// Represents an /export option. +struct Export { + StringRef Name; // N in /export:N or /export:E=N + StringRef ExtName; // E in /export:E=N + SymbolBody *Sym = nullptr; + uint16_t Ordinal = 0; + bool Noname = false; + bool Data = false; + bool Private = false; + + // If an export is a form of /export:foo=dllname.bar, that means + // that foo should be exported as an alias to bar in the DLL. + // ForwardTo is set to "dllname.bar" part. Usually empty. + StringRef ForwardTo; + StringChunk *ForwardChunk = nullptr; + + // True if this /export option was in .drectves section. + bool Directives = false; + StringRef SymbolName; + StringRef ExportName; // Name in DLL + + bool operator==(const Export &E) { + return (Name == E.Name && ExtName == E.ExtName && + Ordinal == E.Ordinal && Noname == E.Noname && + Data == E.Data && Private == E.Private); + } +}; + +enum class DebugType { + None = 0x0, + CV = 0x1, /// CodeView + PData = 0x2, /// Procedure Data + Fixup = 0x4, /// Relocation Table +}; + +// Global configuration. +struct Configuration { + enum ManifestKind { SideBySide, Embed, No }; + bool is64() { return Machine == AMD64; } + + llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN; + bool Verbose = false; + WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; + SymbolBody *Entry = nullptr; + bool NoEntry = false; + std::string OutputFile; + bool DoGC = true; + bool DoICF = true; + bool Relocatable = true; + bool Force = false; + bool Debug = false; + bool WriteSymtab = true; + unsigned DebugTypes = static_cast<unsigned>(DebugType::None); + StringRef PDBPath; + + // Symbols in this set are considered as live by the garbage collector. + std::set<SymbolBody *> GCRoot; + + std::set<StringRef> NoDefaultLibs; + bool NoDefaultLibAll = false; + + // True if we are creating a DLL. + bool DLL = false; + StringRef Implib; + std::vector<Export> Exports; + std::set<std::string> DelayLoads; + std::map<std::string, int> DLLOrder; + SymbolBody *DelayLoadHelper = nullptr; + + // Used for SafeSEH. + Symbol *SEHTable = nullptr; + Symbol *SEHCount = nullptr; + + // Used for /opt:lldlto=N + unsigned LTOOptLevel = 2; + + // Used for /opt:lldltojobs=N + unsigned LTOJobs = 1; + + // Used for /merge:from=to (e.g. /merge:.rdata=.text) + std::map<StringRef, StringRef> Merge; + + // Used for /section=.name,{DEKPRSW} to set section attributes. + std::map<StringRef, uint32_t> Section; + + // Options for manifest files. + ManifestKind Manifest = SideBySide; + int ManifestID = 1; + StringRef ManifestDependency; + bool ManifestUAC = true; + std::vector<std::string> ManifestInput; + StringRef ManifestLevel = "'asInvoker'"; + StringRef ManifestUIAccess = "'false'"; + StringRef ManifestFile; + + // Used for /failifmismatch. + std::map<StringRef, StringRef> MustMatch; + + // Used for /alternatename. + std::map<StringRef, StringRef> AlternateNames; + + uint64_t ImageBase = -1; + uint64_t StackReserve = 1024 * 1024; + uint64_t StackCommit = 4096; + uint64_t HeapReserve = 1024 * 1024; + uint64_t HeapCommit = 4096; + uint32_t MajorImageVersion = 0; + uint32_t MinorImageVersion = 0; + uint32_t MajorOSVersion = 6; + uint32_t MinorOSVersion = 0; + bool DynamicBase = true; + bool AllowBind = true; + bool NxCompat = true; + bool AllowIsolation = true; + bool TerminalServerAware = true; + bool LargeAddressAware = false; + bool HighEntropyVA = false; + + // This is for debugging. + bool DebugPdb = false; + bool DumpPdb = false; +}; + +extern Configuration *Config; + +} // namespace coff +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/COFF/DLL.cpp b/contrib/llvm/tools/lld/COFF/DLL.cpp new file mode 100644 index 000000000000..f93dc5cde44c --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/DLL.cpp @@ -0,0 +1,571 @@ +//===- DLL.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines various types of chunks for the DLL import or export +// descriptor tables. They are inherently Windows-specific. +// You need to read Microsoft PE/COFF spec to understand details +// about the data structures. +// +// If you are not particularly interested in linking against Windows +// DLL, you can skip this file, and you should still be able to +// understand the rest of the linker. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "DLL.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::COFF; + +namespace lld { +namespace coff { +namespace { + +// Import table + +static int ptrSize() { return Config->is64() ? 8 : 4; } + +// A chunk for the import descriptor table. +class HintNameChunk : public Chunk { +public: + HintNameChunk(StringRef N, uint16_t H) : Name(N), Hint(H) {} + + size_t getSize() const override { + // Starts with 2 byte Hint field, followed by a null-terminated string, + // ends with 0 or 1 byte padding. + return alignTo(Name.size() + 3, 2); + } + + void writeTo(uint8_t *Buf) const override { + write16le(Buf + OutputSectionOff, Hint); + memcpy(Buf + OutputSectionOff + 2, Name.data(), Name.size()); + } + +private: + StringRef Name; + uint16_t Hint; +}; + +// A chunk for the import descriptor table. +class LookupChunk : public Chunk { +public: + explicit LookupChunk(Chunk *C) : HintName(C) {} + size_t getSize() const override { return ptrSize(); } + + void writeTo(uint8_t *Buf) const override { + write32le(Buf + OutputSectionOff, HintName->getRVA()); + } + + Chunk *HintName; +}; + +// A chunk for the import descriptor table. +// This chunk represent import-by-ordinal symbols. +// See Microsoft PE/COFF spec 7.1. Import Header for details. +class OrdinalOnlyChunk : public Chunk { +public: + explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) {} + size_t getSize() const override { return ptrSize(); } + + void writeTo(uint8_t *Buf) const override { + // An import-by-ordinal slot has MSB 1 to indicate that + // this is import-by-ordinal (and not import-by-name). + if (Config->is64()) { + write64le(Buf + OutputSectionOff, (1ULL << 63) | Ordinal); + } else { + write32le(Buf + OutputSectionOff, (1ULL << 31) | Ordinal); + } + } + + uint16_t Ordinal; +}; + +// A chunk for the import descriptor table. +class ImportDirectoryChunk : public Chunk { +public: + explicit ImportDirectoryChunk(Chunk *N) : DLLName(N) {} + size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); } + + void writeTo(uint8_t *Buf) const override { + auto *E = (coff_import_directory_table_entry *)(Buf + OutputSectionOff); + E->ImportLookupTableRVA = LookupTab->getRVA(); + E->NameRVA = DLLName->getRVA(); + E->ImportAddressTableRVA = AddressTab->getRVA(); + } + + Chunk *DLLName; + Chunk *LookupTab; + Chunk *AddressTab; +}; + +// A chunk representing null terminator in the import table. +// Contents of this chunk is always null bytes. +class NullChunk : public Chunk { +public: + explicit NullChunk(size_t N) : Size(N) {} + bool hasData() const override { return false; } + size_t getSize() const override { return Size; } + void setAlign(size_t N) { Align = N; } + +private: + size_t Size; +}; + +static std::vector<std::vector<DefinedImportData *>> +binImports(const std::vector<DefinedImportData *> &Imports) { + // Group DLL-imported symbols by DLL name because that's how + // symbols are layed out in the import descriptor table. + auto Less = [](const std::string &A, const std::string &B) { + return Config->DLLOrder[A] < Config->DLLOrder[B]; + }; + std::map<std::string, std::vector<DefinedImportData *>, + bool(*)(const std::string &, const std::string &)> M(Less); + for (DefinedImportData *Sym : Imports) + M[Sym->getDLLName().lower()].push_back(Sym); + + std::vector<std::vector<DefinedImportData *>> V; + for (auto &P : M) { + // Sort symbols by name for each group. + std::vector<DefinedImportData *> &Syms = P.second; + std::sort(Syms.begin(), Syms.end(), + [](DefinedImportData *A, DefinedImportData *B) { + return A->getName() < B->getName(); + }); + V.push_back(std::move(Syms)); + } + return V; +} + +// Export table +// See Microsoft PE/COFF spec 4.3 for details. + +// A chunk for the delay import descriptor table etnry. +class DelayDirectoryChunk : public Chunk { +public: + explicit DelayDirectoryChunk(Chunk *N) : DLLName(N) {} + + size_t getSize() const override { + return sizeof(delay_import_directory_table_entry); + } + + void writeTo(uint8_t *Buf) const override { + auto *E = (delay_import_directory_table_entry *)(Buf + OutputSectionOff); + E->Attributes = 1; + E->Name = DLLName->getRVA(); + E->ModuleHandle = ModuleHandle->getRVA(); + E->DelayImportAddressTable = AddressTab->getRVA(); + E->DelayImportNameTable = NameTab->getRVA(); + } + + Chunk *DLLName; + Chunk *ModuleHandle; + Chunk *AddressTab; + Chunk *NameTab; +}; + +// Initial contents for delay-loaded functions. +// This code calls __delayLoadHelper2 function to resolve a symbol +// and then overwrites its jump table slot with the result +// for subsequent function calls. +static const uint8_t ThunkX64[] = { + 0x51, // push rcx + 0x52, // push rdx + 0x41, 0x50, // push r8 + 0x41, 0x51, // push r9 + 0x48, 0x83, 0xEC, 0x48, // sub rsp, 48h + 0x66, 0x0F, 0x7F, 0x04, 0x24, // movdqa xmmword ptr [rsp], xmm0 + 0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa xmmword ptr [rsp+10h], xmm1 + 0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa xmmword ptr [rsp+20h], xmm2 + 0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa xmmword ptr [rsp+30h], xmm3 + 0x48, 0x8D, 0x15, 0, 0, 0, 0, // lea rdx, [__imp_<FUNCNAME>] + 0x48, 0x8D, 0x0D, 0, 0, 0, 0, // lea rcx, [___DELAY_IMPORT_...] + 0xE8, 0, 0, 0, 0, // call __delayLoadHelper2 + 0x66, 0x0F, 0x6F, 0x04, 0x24, // movdqa xmm0, xmmword ptr [rsp] + 0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa xmm1, xmmword ptr [rsp+10h] + 0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa xmm2, xmmword ptr [rsp+20h] + 0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa xmm3, xmmword ptr [rsp+30h] + 0x48, 0x83, 0xC4, 0x48, // add rsp, 48h + 0x41, 0x59, // pop r9 + 0x41, 0x58, // pop r8 + 0x5A, // pop rdx + 0x59, // pop rcx + 0xFF, 0xE0, // jmp rax +}; + +static const uint8_t ThunkX86[] = { + 0x51, // push ecx + 0x52, // push edx + 0x68, 0, 0, 0, 0, // push offset ___imp__<FUNCNAME> + 0x68, 0, 0, 0, 0, // push offset ___DELAY_IMPORT_DESCRIPTOR_<DLLNAME>_dll + 0xE8, 0, 0, 0, 0, // call ___delayLoadHelper2@8 + 0x5A, // pop edx + 0x59, // pop ecx + 0xFF, 0xE0, // jmp eax +}; + +// A chunk for the delay import thunk. +class ThunkChunkX64 : public Chunk { +public: + ThunkChunkX64(Defined *I, Chunk *D, Defined *H) + : Imp(I), Desc(D), Helper(H) {} + + size_t getSize() const override { return sizeof(ThunkX64); } + + void writeTo(uint8_t *Buf) const override { + memcpy(Buf + OutputSectionOff, ThunkX64, sizeof(ThunkX64)); + write32le(Buf + OutputSectionOff + 36, Imp->getRVA() - RVA - 40); + write32le(Buf + OutputSectionOff + 43, Desc->getRVA() - RVA - 47); + write32le(Buf + OutputSectionOff + 48, Helper->getRVA() - RVA - 52); + } + + Defined *Imp = nullptr; + Chunk *Desc = nullptr; + Defined *Helper = nullptr; +}; + +class ThunkChunkX86 : public Chunk { +public: + ThunkChunkX86(Defined *I, Chunk *D, Defined *H) + : Imp(I), Desc(D), Helper(H) {} + + size_t getSize() const override { return sizeof(ThunkX86); } + + void writeTo(uint8_t *Buf) const override { + memcpy(Buf + OutputSectionOff, ThunkX86, sizeof(ThunkX86)); + write32le(Buf + OutputSectionOff + 3, Imp->getRVA() + Config->ImageBase); + write32le(Buf + OutputSectionOff + 8, Desc->getRVA() + Config->ImageBase); + write32le(Buf + OutputSectionOff + 13, Helper->getRVA() - RVA - 17); + } + + void getBaserels(std::vector<Baserel> *Res) override { + Res->emplace_back(RVA + 3); + Res->emplace_back(RVA + 8); + } + + Defined *Imp = nullptr; + Chunk *Desc = nullptr; + Defined *Helper = nullptr; +}; + +// A chunk for the import descriptor table. +class DelayAddressChunk : public Chunk { +public: + explicit DelayAddressChunk(Chunk *C) : Thunk(C) {} + size_t getSize() const override { return ptrSize(); } + + void writeTo(uint8_t *Buf) const override { + if (Config->is64()) { + write64le(Buf + OutputSectionOff, Thunk->getRVA() + Config->ImageBase); + } else { + write32le(Buf + OutputSectionOff, Thunk->getRVA() + Config->ImageBase); + } + } + + void getBaserels(std::vector<Baserel> *Res) override { + Res->emplace_back(RVA); + } + + Chunk *Thunk; +}; + +// Export table +// Read Microsoft PE/COFF spec 5.3 for details. + +// A chunk for the export descriptor table. +class ExportDirectoryChunk : public Chunk { +public: + ExportDirectoryChunk(int I, int J, Chunk *D, Chunk *A, Chunk *N, Chunk *O) + : MaxOrdinal(I), NameTabSize(J), DLLName(D), AddressTab(A), NameTab(N), + OrdinalTab(O) {} + + size_t getSize() const override { + return sizeof(export_directory_table_entry); + } + + void writeTo(uint8_t *Buf) const override { + auto *E = (export_directory_table_entry *)(Buf + OutputSectionOff); + E->NameRVA = DLLName->getRVA(); + E->OrdinalBase = 0; + E->AddressTableEntries = MaxOrdinal + 1; + E->NumberOfNamePointers = NameTabSize; + E->ExportAddressTableRVA = AddressTab->getRVA(); + E->NamePointerRVA = NameTab->getRVA(); + E->OrdinalTableRVA = OrdinalTab->getRVA(); + } + + uint16_t MaxOrdinal; + uint16_t NameTabSize; + Chunk *DLLName; + Chunk *AddressTab; + Chunk *NameTab; + Chunk *OrdinalTab; +}; + +class AddressTableChunk : public Chunk { +public: + explicit AddressTableChunk(size_t MaxOrdinal) : Size(MaxOrdinal + 1) {} + size_t getSize() const override { return Size * 4; } + + void writeTo(uint8_t *Buf) const override { + for (Export &E : Config->Exports) { + uint8_t *P = Buf + OutputSectionOff + E.Ordinal * 4; + if (E.ForwardChunk) { + write32le(P, E.ForwardChunk->getRVA()); + } else { + write32le(P, cast<Defined>(E.Sym)->getRVA()); + } + } + } + +private: + size_t Size; +}; + +class NamePointersChunk : public Chunk { +public: + explicit NamePointersChunk(std::vector<Chunk *> &V) : Chunks(V) {} + size_t getSize() const override { return Chunks.size() * 4; } + + void writeTo(uint8_t *Buf) const override { + uint8_t *P = Buf + OutputSectionOff; + for (Chunk *C : Chunks) { + write32le(P, C->getRVA()); + P += 4; + } + } + +private: + std::vector<Chunk *> Chunks; +}; + +class ExportOrdinalChunk : public Chunk { +public: + explicit ExportOrdinalChunk(size_t I) : Size(I) {} + size_t getSize() const override { return Size * 2; } + + void writeTo(uint8_t *Buf) const override { + uint8_t *P = Buf + OutputSectionOff; + for (Export &E : Config->Exports) { + if (E.Noname) + continue; + write16le(P, E.Ordinal); + P += 2; + } + } + +private: + size_t Size; +}; + +} // anonymous namespace + +uint64_t IdataContents::getDirSize() { + return Dirs.size() * sizeof(ImportDirectoryTableEntry); +} + +uint64_t IdataContents::getIATSize() { + return Addresses.size() * ptrSize(); +} + +// Returns a list of .idata contents. +// See Microsoft PE/COFF spec 5.4 for details. +std::vector<Chunk *> IdataContents::getChunks() { + create(); + std::vector<Chunk *> V; + // The loader assumes a specific order of data. + // Add each type in the correct order. + for (std::unique_ptr<Chunk> &C : Dirs) + V.push_back(C.get()); + for (std::unique_ptr<Chunk> &C : Lookups) + V.push_back(C.get()); + for (std::unique_ptr<Chunk> &C : Addresses) + V.push_back(C.get()); + for (std::unique_ptr<Chunk> &C : Hints) + V.push_back(C.get()); + for (auto &P : DLLNames) { + std::unique_ptr<Chunk> &C = P.second; + V.push_back(C.get()); + } + return V; +} + +void IdataContents::create() { + std::vector<std::vector<DefinedImportData *>> V = binImports(Imports); + + // Create .idata contents for each DLL. + for (std::vector<DefinedImportData *> &Syms : V) { + StringRef Name = Syms[0]->getDLLName(); + + // Create lookup and address tables. If they have external names, + // we need to create HintName chunks to store the names. + // If they don't (if they are import-by-ordinals), we store only + // ordinal values to the table. + size_t Base = Lookups.size(); + for (DefinedImportData *S : Syms) { + uint16_t Ord = S->getOrdinal(); + if (S->getExternalName().empty()) { + Lookups.push_back(make_unique<OrdinalOnlyChunk>(Ord)); + Addresses.push_back(make_unique<OrdinalOnlyChunk>(Ord)); + continue; + } + auto C = make_unique<HintNameChunk>(S->getExternalName(), Ord); + Lookups.push_back(make_unique<LookupChunk>(C.get())); + Addresses.push_back(make_unique<LookupChunk>(C.get())); + Hints.push_back(std::move(C)); + } + // Terminate with null values. + Lookups.push_back(make_unique<NullChunk>(ptrSize())); + Addresses.push_back(make_unique<NullChunk>(ptrSize())); + + for (int I = 0, E = Syms.size(); I < E; ++I) + Syms[I]->setLocation(Addresses[Base + I].get()); + + // Create the import table header. + if (!DLLNames.count(Name)) + DLLNames[Name] = make_unique<StringChunk>(Name); + auto Dir = make_unique<ImportDirectoryChunk>(DLLNames[Name].get()); + Dir->LookupTab = Lookups[Base].get(); + Dir->AddressTab = Addresses[Base].get(); + Dirs.push_back(std::move(Dir)); + } + // Add null terminator. + Dirs.push_back(make_unique<NullChunk>(sizeof(ImportDirectoryTableEntry))); +} + +std::vector<Chunk *> DelayLoadContents::getChunks() { + std::vector<Chunk *> V; + for (std::unique_ptr<Chunk> &C : Dirs) + V.push_back(C.get()); + for (std::unique_ptr<Chunk> &C : Names) + V.push_back(C.get()); + for (std::unique_ptr<Chunk> &C : HintNames) + V.push_back(C.get()); + for (auto &P : DLLNames) { + std::unique_ptr<Chunk> &C = P.second; + V.push_back(C.get()); + } + return V; +} + +std::vector<Chunk *> DelayLoadContents::getDataChunks() { + std::vector<Chunk *> V; + for (std::unique_ptr<Chunk> &C : ModuleHandles) + V.push_back(C.get()); + for (std::unique_ptr<Chunk> &C : Addresses) + V.push_back(C.get()); + return V; +} + +uint64_t DelayLoadContents::getDirSize() { + return Dirs.size() * sizeof(delay_import_directory_table_entry); +} + +void DelayLoadContents::create(Defined *H) { + Helper = H; + std::vector<std::vector<DefinedImportData *>> V = binImports(Imports); + + // Create .didat contents for each DLL. + for (std::vector<DefinedImportData *> &Syms : V) { + StringRef Name = Syms[0]->getDLLName(); + + // Create the delay import table header. + if (!DLLNames.count(Name)) + DLLNames[Name] = make_unique<StringChunk>(Name); + auto Dir = make_unique<DelayDirectoryChunk>(DLLNames[Name].get()); + + size_t Base = Addresses.size(); + for (DefinedImportData *S : Syms) { + Chunk *T = newThunkChunk(S, Dir.get()); + auto A = make_unique<DelayAddressChunk>(T); + Addresses.push_back(std::move(A)); + Thunks.push_back(std::unique_ptr<Chunk>(T)); + StringRef ExtName = S->getExternalName(); + if (ExtName.empty()) { + Names.push_back(make_unique<OrdinalOnlyChunk>(S->getOrdinal())); + } else { + auto C = make_unique<HintNameChunk>(ExtName, 0); + Names.push_back(make_unique<LookupChunk>(C.get())); + HintNames.push_back(std::move(C)); + } + } + // Terminate with null values. + Addresses.push_back(make_unique<NullChunk>(8)); + Names.push_back(make_unique<NullChunk>(8)); + + for (int I = 0, E = Syms.size(); I < E; ++I) + Syms[I]->setLocation(Addresses[Base + I].get()); + auto *MH = new NullChunk(8); + MH->setAlign(8); + ModuleHandles.push_back(std::unique_ptr<Chunk>(MH)); + + // Fill the delay import table header fields. + Dir->ModuleHandle = MH; + Dir->AddressTab = Addresses[Base].get(); + Dir->NameTab = Names[Base].get(); + Dirs.push_back(std::move(Dir)); + } + // Add null terminator. + Dirs.push_back( + make_unique<NullChunk>(sizeof(delay_import_directory_table_entry))); +} + +Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *S, Chunk *Dir) { + switch (Config->Machine) { + case AMD64: + return new ThunkChunkX64(S, Dir, Helper); + case I386: + return new ThunkChunkX86(S, Dir, Helper); + default: + llvm_unreachable("unsupported machine type"); + } +} + +EdataContents::EdataContents() { + uint16_t MaxOrdinal = 0; + for (Export &E : Config->Exports) + MaxOrdinal = std::max(MaxOrdinal, E.Ordinal); + + auto *DLLName = new StringChunk(sys::path::filename(Config->OutputFile)); + auto *AddressTab = new AddressTableChunk(MaxOrdinal); + std::vector<Chunk *> Names; + for (Export &E : Config->Exports) + if (!E.Noname) + Names.push_back(new StringChunk(E.ExportName)); + + std::vector<Chunk *> Forwards; + for (Export &E : Config->Exports) { + if (E.ForwardTo.empty()) + continue; + E.ForwardChunk = new StringChunk(E.ForwardTo); + Forwards.push_back(E.ForwardChunk); + } + + auto *NameTab = new NamePointersChunk(Names); + auto *OrdinalTab = new ExportOrdinalChunk(Names.size()); + auto *Dir = new ExportDirectoryChunk(MaxOrdinal, Names.size(), DLLName, + AddressTab, NameTab, OrdinalTab); + Chunks.push_back(std::unique_ptr<Chunk>(Dir)); + Chunks.push_back(std::unique_ptr<Chunk>(DLLName)); + Chunks.push_back(std::unique_ptr<Chunk>(AddressTab)); + Chunks.push_back(std::unique_ptr<Chunk>(NameTab)); + Chunks.push_back(std::unique_ptr<Chunk>(OrdinalTab)); + for (Chunk *C : Names) + Chunks.push_back(std::unique_ptr<Chunk>(C)); + for (Chunk *C : Forwards) + Chunks.push_back(std::unique_ptr<Chunk>(C)); +} + +} // namespace coff +} // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/DLL.h b/contrib/llvm/tools/lld/COFF/DLL.h new file mode 100644 index 000000000000..83a12df185c2 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/DLL.h @@ -0,0 +1,84 @@ +//===- DLL.h ----------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_DLL_H +#define LLD_COFF_DLL_H + +#include "Chunks.h" +#include "Symbols.h" + +namespace lld { +namespace coff { + +// Windows-specific. +// IdataContents creates all chunks for the DLL import table. +// You are supposed to call add() to add symbols and then +// call getChunks() to get a list of chunks. +class IdataContents { +public: + void add(DefinedImportData *Sym) { Imports.push_back(Sym); } + bool empty() { return Imports.empty(); } + std::vector<Chunk *> getChunks(); + + uint64_t getDirRVA() { return Dirs[0]->getRVA(); } + uint64_t getDirSize(); + uint64_t getIATRVA() { return Addresses[0]->getRVA(); } + uint64_t getIATSize(); + +private: + void create(); + + std::vector<DefinedImportData *> Imports; + std::vector<std::unique_ptr<Chunk>> Dirs; + std::vector<std::unique_ptr<Chunk>> Lookups; + std::vector<std::unique_ptr<Chunk>> Addresses; + std::vector<std::unique_ptr<Chunk>> Hints; + std::map<StringRef, std::unique_ptr<Chunk>> DLLNames; +}; + +// Windows-specific. +// DelayLoadContents creates all chunks for the delay-load DLL import table. +class DelayLoadContents { +public: + void add(DefinedImportData *Sym) { Imports.push_back(Sym); } + bool empty() { return Imports.empty(); } + void create(Defined *Helper); + std::vector<Chunk *> getChunks(); + std::vector<Chunk *> getDataChunks(); + std::vector<std::unique_ptr<Chunk>> &getCodeChunks() { return Thunks; } + + uint64_t getDirRVA() { return Dirs[0]->getRVA(); } + uint64_t getDirSize(); + +private: + Chunk *newThunkChunk(DefinedImportData *S, Chunk *Dir); + + Defined *Helper; + std::vector<DefinedImportData *> Imports; + std::vector<std::unique_ptr<Chunk>> Dirs; + std::vector<std::unique_ptr<Chunk>> ModuleHandles; + std::vector<std::unique_ptr<Chunk>> Addresses; + std::vector<std::unique_ptr<Chunk>> Names; + std::vector<std::unique_ptr<Chunk>> HintNames; + std::vector<std::unique_ptr<Chunk>> Thunks; + std::map<StringRef, std::unique_ptr<Chunk>> DLLNames; +}; + +// Windows-specific. +// EdataContents creates all chunks for the DLL export table. +class EdataContents { +public: + EdataContents(); + std::vector<std::unique_ptr<Chunk>> Chunks; +}; + +} // namespace coff +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/COFF/Driver.cpp b/contrib/llvm/tools/lld/COFF/Driver.cpp new file mode 100644 index 000000000000..4dabd9ebcc6d --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Driver.cpp @@ -0,0 +1,865 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Config.h" +#include "Error.h" +#include "InputFiles.h" +#include "Memory.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "Writer.h" +#include "lld/Driver/Driver.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/LibDriver/LibDriver.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/TarWriter.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <memory> + +#ifdef _MSC_VER +// <future> depends on <eh.h> for __uncaught_exception. +#include <eh.h> +#endif + +#include <future> + +using namespace llvm; +using namespace llvm::COFF; +using llvm::sys::Process; +using llvm::sys::fs::OpenFlags; +using llvm::sys::fs::file_magic; +using llvm::sys::fs::identify_magic; + +namespace lld { +namespace coff { + +Configuration *Config; +LinkerDriver *Driver; + +BumpPtrAllocator BAlloc; +StringSaver Saver{BAlloc}; +std::vector<SpecificAllocBase *> SpecificAllocBase::Instances; + +bool link(ArrayRef<const char *> Args) { + Config = make<Configuration>(); + Driver = make<LinkerDriver>(); + Driver->link(Args); + return true; +} + +// Drop directory components and replace extension with ".exe" or ".dll". +static std::string getOutputPath(StringRef Path) { + auto P = Path.find_last_of("\\/"); + StringRef S = (P == StringRef::npos) ? Path : Path.substr(P + 1); + const char* E = Config->DLL ? ".dll" : ".exe"; + return (S.substr(0, S.rfind('.')) + E).str(); +} + +// ErrorOr is not default constructible, so it cannot be used as the type +// parameter of a future. +// FIXME: We could open the file in createFutureForFile and avoid needing to +// return an error here, but for the moment that would cost us a file descriptor +// (a limited resource on Windows) for the duration that the future is pending. +typedef std::pair<std::unique_ptr<MemoryBuffer>, std::error_code> MBErrPair; + +// Create a std::future that opens and maps a file using the best strategy for +// the host platform. +static std::future<MBErrPair> createFutureForFile(std::string Path) { +#if LLVM_ON_WIN32 + // On Windows, file I/O is relatively slow so it is best to do this + // asynchronously. + auto Strategy = std::launch::async; +#else + auto Strategy = std::launch::deferred; +#endif + return std::async(Strategy, [=]() { + auto MBOrErr = MemoryBuffer::getFile(Path); + if (!MBOrErr) + return MBErrPair{nullptr, MBOrErr.getError()}; + return MBErrPair{std::move(*MBOrErr), std::error_code()}; + }); +} + +MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr<MemoryBuffer> MB) { + MemoryBufferRef MBRef = *MB; + OwningMBs.push_back(std::move(MB)); + + if (Driver->Tar) + Driver->Tar->append(relativeToRoot(MBRef.getBufferIdentifier()), + MBRef.getBuffer()); + + return MBRef; +} + +void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> MB) { + MemoryBufferRef MBRef = takeBuffer(std::move(MB)); + + // File type is detected by contents, not by file extension. + file_magic Magic = identify_magic(MBRef.getBuffer()); + if (Magic == file_magic::windows_resource) { + Resources.push_back(MBRef); + return; + } + + FilePaths.push_back(MBRef.getBufferIdentifier()); + if (Magic == file_magic::archive) + return Symtab.addFile(make<ArchiveFile>(MBRef)); + if (Magic == file_magic::bitcode) + return Symtab.addFile(make<BitcodeFile>(MBRef)); + if (Magic == file_magic::coff_cl_gl_object) + fatal(MBRef.getBufferIdentifier() + ": is not a native COFF file. " + "Recompile without /GL"); + Symtab.addFile(make<ObjectFile>(MBRef)); +} + +void LinkerDriver::enqueuePath(StringRef Path) { + auto Future = + std::make_shared<std::future<MBErrPair>>(createFutureForFile(Path)); + std::string PathStr = Path; + enqueueTask([=]() { + auto MBOrErr = Future->get(); + if (MBOrErr.second) + fatal(MBOrErr.second, "could not open " + PathStr); + Driver->addBuffer(std::move(MBOrErr.first)); + }); + + if (Config->OutputFile == "") + Config->OutputFile = getOutputPath(Path); +} + +void LinkerDriver::addArchiveBuffer(MemoryBufferRef MB, StringRef SymName, + StringRef ParentName) { + file_magic Magic = identify_magic(MB.getBuffer()); + if (Magic == file_magic::coff_import_library) { + Symtab.addFile(make<ImportFile>(MB)); + return; + } + + InputFile *Obj; + if (Magic == file_magic::coff_object) + Obj = make<ObjectFile>(MB); + else if (Magic == file_magic::bitcode) + Obj = make<BitcodeFile>(MB); + else + fatal("unknown file type: " + MB.getBufferIdentifier()); + + Obj->ParentName = ParentName; + Symtab.addFile(Obj); + if (Config->Verbose) + outs() << "Loaded " << toString(Obj) << " for " << SymName << "\n"; +} + +void LinkerDriver::enqueueArchiveMember(const Archive::Child &C, + StringRef SymName, + StringRef ParentName) { + if (!C.getParent()->isThin()) { + MemoryBufferRef MB = check( + C.getMemoryBufferRef(), + "could not get the buffer for the member defining symbol " + SymName); + enqueueTask([=]() { Driver->addArchiveBuffer(MB, SymName, ParentName); }); + return; + } + + auto Future = std::make_shared<std::future<MBErrPair>>(createFutureForFile( + check(C.getFullName(), + "could not get the filename for the member defining symbol " + + SymName))); + enqueueTask([=]() { + auto MBOrErr = Future->get(); + if (MBOrErr.second) + fatal(MBOrErr.second, + "could not get the buffer for the member defining " + SymName); + Driver->addArchiveBuffer(takeBuffer(std::move(MBOrErr.first)), SymName, + ParentName); + }); +} + +static bool isDecorated(StringRef Sym) { + return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?"); +} + +// Parses .drectve section contents and returns a list of files +// specified by /defaultlib. +void LinkerDriver::parseDirectives(StringRef S) { + opt::InputArgList Args = Parser.parse(S); + + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_alternatename: + parseAlternateName(Arg->getValue()); + break; + case OPT_defaultlib: + if (Optional<StringRef> Path = findLib(Arg->getValue())) + enqueuePath(*Path); + break; + case OPT_export: { + Export E = parseExport(Arg->getValue()); + E.Directives = true; + Config->Exports.push_back(E); + break; + } + case OPT_failifmismatch: + checkFailIfMismatch(Arg->getValue()); + break; + case OPT_incl: + addUndefined(Arg->getValue()); + break; + case OPT_merge: + parseMerge(Arg->getValue()); + break; + case OPT_nodefaultlib: + Config->NoDefaultLibs.insert(doFindLib(Arg->getValue())); + break; + case OPT_section: + parseSection(Arg->getValue()); + break; + case OPT_editandcontinue: + case OPT_fastfail: + case OPT_guardsym: + case OPT_throwingnew: + break; + default: + fatal(Arg->getSpelling() + " is not allowed in .drectve"); + } + } +} + +// Find file from search paths. You can omit ".obj", this function takes +// care of that. Note that the returned path is not guaranteed to exist. +StringRef LinkerDriver::doFindFile(StringRef Filename) { + bool HasPathSep = (Filename.find_first_of("/\\") != StringRef::npos); + if (HasPathSep) + return Filename; + bool HasExt = (Filename.find('.') != StringRef::npos); + for (StringRef Dir : SearchPaths) { + SmallString<128> Path = Dir; + sys::path::append(Path, Filename); + if (sys::fs::exists(Path.str())) + return Saver.save(Path.str()); + if (!HasExt) { + Path.append(".obj"); + if (sys::fs::exists(Path.str())) + return Saver.save(Path.str()); + } + } + return Filename; +} + +// Resolves a file path. This never returns the same path +// (in that case, it returns None). +Optional<StringRef> LinkerDriver::findFile(StringRef Filename) { + StringRef Path = doFindFile(Filename); + bool Seen = !VisitedFiles.insert(Path.lower()).second; + if (Seen) + return None; + return Path; +} + +// Find library file from search path. +StringRef LinkerDriver::doFindLib(StringRef Filename) { + // Add ".lib" to Filename if that has no file extension. + bool HasExt = (Filename.find('.') != StringRef::npos); + if (!HasExt) + Filename = Saver.save(Filename + ".lib"); + return doFindFile(Filename); +} + +// Resolves a library path. /nodefaultlib options are taken into +// consideration. This never returns the same path (in that case, +// it returns None). +Optional<StringRef> LinkerDriver::findLib(StringRef Filename) { + if (Config->NoDefaultLibAll) + return None; + if (!VisitedLibs.insert(Filename.lower()).second) + return None; + StringRef Path = doFindLib(Filename); + if (Config->NoDefaultLibs.count(Path)) + return None; + if (!VisitedFiles.insert(Path.lower()).second) + return None; + return Path; +} + +// Parses LIB environment which contains a list of search paths. +void LinkerDriver::addLibSearchPaths() { + Optional<std::string> EnvOpt = Process::GetEnv("LIB"); + if (!EnvOpt.hasValue()) + return; + StringRef Env = Saver.save(*EnvOpt); + while (!Env.empty()) { + StringRef Path; + std::tie(Path, Env) = Env.split(';'); + SearchPaths.push_back(Path); + } +} + +SymbolBody *LinkerDriver::addUndefined(StringRef Name) { + SymbolBody *B = Symtab.addUndefined(Name); + Config->GCRoot.insert(B); + return B; +} + +// Symbol names are mangled by appending "_" prefix on x86. +StringRef LinkerDriver::mangle(StringRef Sym) { + assert(Config->Machine != IMAGE_FILE_MACHINE_UNKNOWN); + if (Config->Machine == I386) + return Saver.save("_" + Sym); + return Sym; +} + +// Windows specific -- find default entry point name. +StringRef LinkerDriver::findDefaultEntry() { + // User-defined main functions and their corresponding entry points. + static const char *Entries[][2] = { + {"main", "mainCRTStartup"}, + {"wmain", "wmainCRTStartup"}, + {"WinMain", "WinMainCRTStartup"}, + {"wWinMain", "wWinMainCRTStartup"}, + }; + for (auto E : Entries) { + StringRef Entry = Symtab.findMangle(mangle(E[0])); + if (!Entry.empty() && !isa<Undefined>(Symtab.find(Entry)->body())) + return mangle(E[1]); + } + return ""; +} + +WindowsSubsystem LinkerDriver::inferSubsystem() { + if (Config->DLL) + return IMAGE_SUBSYSTEM_WINDOWS_GUI; + if (Symtab.findUnderscore("main") || Symtab.findUnderscore("wmain")) + return IMAGE_SUBSYSTEM_WINDOWS_CUI; + if (Symtab.findUnderscore("WinMain") || Symtab.findUnderscore("wWinMain")) + return IMAGE_SUBSYSTEM_WINDOWS_GUI; + return IMAGE_SUBSYSTEM_UNKNOWN; +} + +static uint64_t getDefaultImageBase() { + if (Config->is64()) + return Config->DLL ? 0x180000000 : 0x140000000; + return Config->DLL ? 0x10000000 : 0x400000; +} + +static std::string createResponseFile(const opt::InputArgList &Args, + ArrayRef<StringRef> FilePaths, + ArrayRef<StringRef> SearchPaths) { + SmallString<0> Data; + raw_svector_ostream OS(Data); + + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_linkrepro: + case OPT_INPUT: + case OPT_defaultlib: + case OPT_libpath: + break; + default: + OS << toString(Arg) << "\n"; + } + } + + for (StringRef Path : SearchPaths) { + std::string RelPath = relativeToRoot(Path); + OS << "/libpath:" << quote(RelPath) << "\n"; + } + + for (StringRef Path : FilePaths) + OS << quote(relativeToRoot(Path)) << "\n"; + + return Data.str(); +} + +static unsigned getDefaultDebugType(const opt::InputArgList &Args) { + unsigned DebugTypes = static_cast<unsigned>(DebugType::CV); + if (Args.hasArg(OPT_driver)) + DebugTypes |= static_cast<unsigned>(DebugType::PData); + if (Args.hasArg(OPT_profile)) + DebugTypes |= static_cast<unsigned>(DebugType::Fixup); + return DebugTypes; +} + +static unsigned parseDebugType(StringRef Arg) { + SmallVector<StringRef, 3> Types; + Arg.split(Types, ',', /*KeepEmpty=*/false); + + unsigned DebugTypes = static_cast<unsigned>(DebugType::None); + for (StringRef Type : Types) + DebugTypes |= StringSwitch<unsigned>(Type.lower()) + .Case("cv", static_cast<unsigned>(DebugType::CV)) + .Case("pdata", static_cast<unsigned>(DebugType::PData)) + .Case("fixup", static_cast<unsigned>(DebugType::Fixup)); + return DebugTypes; +} + +static std::string getMapFile(const opt::InputArgList &Args) { + auto *Arg = Args.getLastArg(OPT_lldmap, OPT_lldmap_file); + if (!Arg) + return ""; + if (Arg->getOption().getID() == OPT_lldmap_file) + return Arg->getValue(); + + assert(Arg->getOption().getID() == OPT_lldmap); + StringRef OutFile = Config->OutputFile; + return (OutFile.substr(0, OutFile.rfind('.')) + ".map").str(); +} + +void LinkerDriver::enqueueTask(std::function<void()> Task) { + TaskQueue.push_back(std::move(Task)); +} + +bool LinkerDriver::run() { + bool DidWork = !TaskQueue.empty(); + while (!TaskQueue.empty()) { + TaskQueue.front()(); + TaskQueue.pop_front(); + } + return DidWork; +} + +void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { + // If the first command line argument is "/lib", link.exe acts like lib.exe. + // We call our own implementation of lib.exe that understands bitcode files. + if (ArgsArr.size() > 1 && StringRef(ArgsArr[1]).equals_lower("/lib")) { + if (llvm::libDriverMain(ArgsArr.slice(1)) != 0) + fatal("lib failed"); + return; + } + + // Needed for LTO. + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); + InitializeAllDisassemblers(); + + // Parse command line options. + opt::InputArgList Args = Parser.parseLINK(ArgsArr.slice(1)); + + // Handle /help + if (Args.hasArg(OPT_help)) { + printHelp(ArgsArr[0]); + return; + } + + if (auto *Arg = Args.getLastArg(OPT_linkrepro)) { + SmallString<64> Path = StringRef(Arg->getValue()); + sys::path::append(Path, "repro.tar"); + + Expected<std::unique_ptr<TarWriter>> ErrOrWriter = + TarWriter::create(Path, "repro"); + + if (ErrOrWriter) { + Tar = std::move(*ErrOrWriter); + } else { + errs() << "/linkrepro: failed to open " << Path << ": " + << toString(ErrOrWriter.takeError()) << '\n'; + } + } + + if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) + fatal("no input files"); + + // Construct search path list. + SearchPaths.push_back(""); + for (auto *Arg : Args.filtered(OPT_libpath)) + SearchPaths.push_back(Arg->getValue()); + addLibSearchPaths(); + + // Handle /out + if (auto *Arg = Args.getLastArg(OPT_out)) + Config->OutputFile = Arg->getValue(); + + // Handle /verbose + if (Args.hasArg(OPT_verbose)) + Config->Verbose = true; + + // Handle /force or /force:unresolved + if (Args.hasArg(OPT_force) || Args.hasArg(OPT_force_unresolved)) + Config->Force = true; + + // Handle /debug + if (Args.hasArg(OPT_debug)) { + Config->Debug = true; + Config->DebugTypes = + Args.hasArg(OPT_debugtype) + ? parseDebugType(Args.getLastArg(OPT_debugtype)->getValue()) + : getDefaultDebugType(Args); + } + + // Create a dummy PDB file to satisfy build sytem rules. + if (auto *Arg = Args.getLastArg(OPT_pdb)) + Config->PDBPath = Arg->getValue(); + + // Handle /noentry + if (Args.hasArg(OPT_noentry)) { + if (!Args.hasArg(OPT_dll)) + fatal("/noentry must be specified with /dll"); + Config->NoEntry = true; + } + + // Handle /dll + if (Args.hasArg(OPT_dll)) { + Config->DLL = true; + Config->ManifestID = 2; + } + + // Handle /fixed + if (Args.hasArg(OPT_fixed)) { + if (Args.hasArg(OPT_dynamicbase)) + fatal("/fixed must not be specified with /dynamicbase"); + Config->Relocatable = false; + Config->DynamicBase = false; + } + + // Handle /machine + if (auto *Arg = Args.getLastArg(OPT_machine)) + Config->Machine = getMachineType(Arg->getValue()); + + // Handle /nodefaultlib:<filename> + for (auto *Arg : Args.filtered(OPT_nodefaultlib)) + Config->NoDefaultLibs.insert(doFindLib(Arg->getValue())); + + // Handle /nodefaultlib + if (Args.hasArg(OPT_nodefaultlib_all)) + Config->NoDefaultLibAll = true; + + // Handle /base + if (auto *Arg = Args.getLastArg(OPT_base)) + parseNumbers(Arg->getValue(), &Config->ImageBase); + + // Handle /stack + if (auto *Arg = Args.getLastArg(OPT_stack)) + parseNumbers(Arg->getValue(), &Config->StackReserve, &Config->StackCommit); + + // Handle /heap + if (auto *Arg = Args.getLastArg(OPT_heap)) + parseNumbers(Arg->getValue(), &Config->HeapReserve, &Config->HeapCommit); + + // Handle /version + if (auto *Arg = Args.getLastArg(OPT_version)) + parseVersion(Arg->getValue(), &Config->MajorImageVersion, + &Config->MinorImageVersion); + + // Handle /subsystem + if (auto *Arg = Args.getLastArg(OPT_subsystem)) + parseSubsystem(Arg->getValue(), &Config->Subsystem, &Config->MajorOSVersion, + &Config->MinorOSVersion); + + // Handle /alternatename + for (auto *Arg : Args.filtered(OPT_alternatename)) + parseAlternateName(Arg->getValue()); + + // Handle /include + for (auto *Arg : Args.filtered(OPT_incl)) + addUndefined(Arg->getValue()); + + // Handle /implib + if (auto *Arg = Args.getLastArg(OPT_implib)) + Config->Implib = Arg->getValue(); + + // Handle /opt + for (auto *Arg : Args.filtered(OPT_opt)) { + std::string Str = StringRef(Arg->getValue()).lower(); + SmallVector<StringRef, 1> Vec; + StringRef(Str).split(Vec, ','); + for (StringRef S : Vec) { + if (S == "noref") { + Config->DoGC = false; + Config->DoICF = false; + continue; + } + if (S == "icf" || StringRef(S).startswith("icf=")) { + Config->DoICF = true; + continue; + } + if (S == "noicf") { + Config->DoICF = false; + continue; + } + if (StringRef(S).startswith("lldlto=")) { + StringRef OptLevel = StringRef(S).substr(7); + if (OptLevel.getAsInteger(10, Config->LTOOptLevel) || + Config->LTOOptLevel > 3) + fatal("/opt:lldlto: invalid optimization level: " + OptLevel); + continue; + } + if (StringRef(S).startswith("lldltojobs=")) { + StringRef Jobs = StringRef(S).substr(11); + if (Jobs.getAsInteger(10, Config->LTOJobs) || Config->LTOJobs == 0) + fatal("/opt:lldltojobs: invalid job count: " + Jobs); + continue; + } + if (S != "ref" && S != "lbr" && S != "nolbr") + fatal("/opt: unknown option: " + S); + } + } + + // Handle /failifmismatch + for (auto *Arg : Args.filtered(OPT_failifmismatch)) + checkFailIfMismatch(Arg->getValue()); + + // Handle /merge + for (auto *Arg : Args.filtered(OPT_merge)) + parseMerge(Arg->getValue()); + + // Handle /section + for (auto *Arg : Args.filtered(OPT_section)) + parseSection(Arg->getValue()); + + // Handle /manifest + if (auto *Arg = Args.getLastArg(OPT_manifest_colon)) + parseManifest(Arg->getValue()); + + // Handle /manifestuac + if (auto *Arg = Args.getLastArg(OPT_manifestuac)) + parseManifestUAC(Arg->getValue()); + + // Handle /manifestdependency + if (auto *Arg = Args.getLastArg(OPT_manifestdependency)) + Config->ManifestDependency = Arg->getValue(); + + // Handle /manifestfile + if (auto *Arg = Args.getLastArg(OPT_manifestfile)) + Config->ManifestFile = Arg->getValue(); + + // Handle /manifestinput + for (auto *Arg : Args.filtered(OPT_manifestinput)) + Config->ManifestInput.push_back(Arg->getValue()); + + // Handle miscellaneous boolean flags. + if (Args.hasArg(OPT_allowbind_no)) + Config->AllowBind = false; + if (Args.hasArg(OPT_allowisolation_no)) + Config->AllowIsolation = false; + if (Args.hasArg(OPT_dynamicbase_no)) + Config->DynamicBase = false; + if (Args.hasArg(OPT_nxcompat_no)) + Config->NxCompat = false; + if (Args.hasArg(OPT_tsaware_no)) + Config->TerminalServerAware = false; + if (Args.hasArg(OPT_nosymtab)) + Config->WriteSymtab = false; + Config->DumpPdb = Args.hasArg(OPT_dumppdb); + Config->DebugPdb = Args.hasArg(OPT_debugpdb); + + // Create a list of input files. Files can be given as arguments + // for /defaultlib option. + std::vector<MemoryBufferRef> MBs; + for (auto *Arg : Args.filtered(OPT_INPUT)) + if (Optional<StringRef> Path = findFile(Arg->getValue())) + enqueuePath(*Path); + for (auto *Arg : Args.filtered(OPT_defaultlib)) + if (Optional<StringRef> Path = findLib(Arg->getValue())) + enqueuePath(*Path); + + // Windows specific -- Create a resource file containing a manifest file. + if (Config->Manifest == Configuration::Embed) + addBuffer(createManifestRes()); + + // Read all input files given via the command line. + run(); + + // We should have inferred a machine type by now from the input files, but if + // not we assume x64. + if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { + errs() << "warning: /machine is not specified. x64 is assumed.\n"; + Config->Machine = AMD64; + } + + // Windows specific -- Input files can be Windows resource files (.res files). + // We invoke cvtres.exe to convert resource files to a regular COFF file + // then link the result file normally. + if (!Resources.empty()) + addBuffer(convertResToCOFF(Resources)); + + if (Tar) + Tar->append("response.txt", + createResponseFile(Args, FilePaths, + ArrayRef<StringRef>(SearchPaths).slice(1))); + + // Handle /largeaddressaware + if (Config->is64() || Args.hasArg(OPT_largeaddressaware)) + Config->LargeAddressAware = true; + + // Handle /highentropyva + if (Config->is64() && !Args.hasArg(OPT_highentropyva_no)) + Config->HighEntropyVA = true; + + // Handle /entry and /dll + if (auto *Arg = Args.getLastArg(OPT_entry)) { + Config->Entry = addUndefined(mangle(Arg->getValue())); + } else if (Args.hasArg(OPT_dll) && !Config->NoEntry) { + StringRef S = (Config->Machine == I386) ? "__DllMainCRTStartup@12" + : "_DllMainCRTStartup"; + Config->Entry = addUndefined(S); + } else if (!Config->NoEntry) { + // Windows specific -- If entry point name is not given, we need to + // infer that from user-defined entry name. + StringRef S = findDefaultEntry(); + if (S.empty()) + fatal("entry point must be defined"); + Config->Entry = addUndefined(S); + if (Config->Verbose) + outs() << "Entry name inferred: " << S << "\n"; + } + + // Handle /export + for (auto *Arg : Args.filtered(OPT_export)) { + Export E = parseExport(Arg->getValue()); + if (Config->Machine == I386) { + if (!isDecorated(E.Name)) + E.Name = Saver.save("_" + E.Name); + if (!E.ExtName.empty() && !isDecorated(E.ExtName)) + E.ExtName = Saver.save("_" + E.ExtName); + } + Config->Exports.push_back(E); + } + + // Handle /def + if (auto *Arg = Args.getLastArg(OPT_deffile)) { + // parseModuleDefs mutates Config object. + parseModuleDefs( + takeBuffer(check(MemoryBuffer::getFile(Arg->getValue()), + Twine("could not open ") + Arg->getValue()))); + } + + // Handle /delayload + for (auto *Arg : Args.filtered(OPT_delayload)) { + Config->DelayLoads.insert(StringRef(Arg->getValue()).lower()); + if (Config->Machine == I386) { + Config->DelayLoadHelper = addUndefined("___delayLoadHelper2@8"); + } else { + Config->DelayLoadHelper = addUndefined("__delayLoadHelper2"); + } + } + + // Set default image base if /base is not given. + if (Config->ImageBase == uint64_t(-1)) + Config->ImageBase = getDefaultImageBase(); + + Symtab.addRelative(mangle("__ImageBase"), 0); + if (Config->Machine == I386) { + Config->SEHTable = Symtab.addRelative("___safe_se_handler_table", 0); + Config->SEHCount = Symtab.addAbsolute("___safe_se_handler_count", 0); + } + + // We do not support /guard:cf (control flow protection) yet. + // Define CFG symbols anyway so that we can link MSVC 2015 CRT. + Symtab.addAbsolute(mangle("__guard_fids_table"), 0); + Symtab.addAbsolute(mangle("__guard_fids_count"), 0); + Symtab.addAbsolute(mangle("__guard_flags"), 0x100); + + // This code may add new undefined symbols to the link, which may enqueue more + // symbol resolution tasks, so we need to continue executing tasks until we + // converge. + do { + // Windows specific -- if entry point is not found, + // search for its mangled names. + if (Config->Entry) + Symtab.mangleMaybe(Config->Entry); + + // Windows specific -- Make sure we resolve all dllexported symbols. + for (Export &E : Config->Exports) { + if (!E.ForwardTo.empty()) + continue; + E.Sym = addUndefined(E.Name); + if (!E.Directives) + Symtab.mangleMaybe(E.Sym); + } + + // Add weak aliases. Weak aliases is a mechanism to give remaining + // undefined symbols final chance to be resolved successfully. + for (auto Pair : Config->AlternateNames) { + StringRef From = Pair.first; + StringRef To = Pair.second; + Symbol *Sym = Symtab.find(From); + if (!Sym) + continue; + if (auto *U = dyn_cast<Undefined>(Sym->body())) + if (!U->WeakAlias) + U->WeakAlias = Symtab.addUndefined(To); + } + + // Windows specific -- if __load_config_used can be resolved, resolve it. + if (Symtab.findUnderscore("_load_config_used")) + addUndefined(mangle("_load_config_used")); + } while (run()); + + // Do LTO by compiling bitcode input files to a set of native COFF files then + // link those files. + Symtab.addCombinedLTOObjects(); + run(); + + // Make sure we have resolved all symbols. + Symtab.reportRemainingUndefines(); + + // Windows specific -- if no /subsystem is given, we need to infer + // that from entry point name. + if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) { + Config->Subsystem = inferSubsystem(); + if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) + fatal("subsystem must be defined"); + } + + // Handle /safeseh. + if (Args.hasArg(OPT_safeseh)) + for (ObjectFile *File : Symtab.ObjectFiles) + if (!File->SEHCompat) + fatal("/safeseh: " + File->getName() + " is not compatible with SEH"); + + // Windows specific -- when we are creating a .dll file, we also + // need to create a .lib file. + if (!Config->Exports.empty() || Config->DLL) { + fixupExports(); + writeImportLibrary(); + assignExportOrdinals(); + } + + // Windows specific -- Create a side-by-side manifest file. + if (Config->Manifest == Configuration::SideBySide) + createSideBySideManifest(); + + // Identify unreferenced COMDAT sections. + if (Config->DoGC) + markLive(Symtab.getChunks()); + + // Identify identical COMDAT sections to merge them. + if (Config->DoICF) + doICF(Symtab.getChunks()); + + // Write the result. + writeResult(&Symtab); + + // Create a symbol map file containing symbol VAs and their names + // to help debugging. + std::string MapFile = getMapFile(Args); + if (!MapFile.empty()) { + std::error_code EC; + raw_fd_ostream Out(MapFile, EC, OpenFlags::F_Text); + if (EC) + fatal(EC, "could not create the symbol map " + MapFile); + Symtab.printMap(Out); + } + + // Call exit to avoid calling destructors. + exit(0); +} + +} // namespace coff +} // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Driver.h b/contrib/llvm/tools/lld/COFF/Driver.h new file mode 100644 index 000000000000..44894269fcbe --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Driver.h @@ -0,0 +1,192 @@ +//===- Driver.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_DRIVER_H +#define LLD_COFF_DRIVER_H + +#include "Config.h" +#include "SymbolTable.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/TarWriter.h" +#include <memory> +#include <set> +#include <vector> + +namespace lld { +namespace coff { + +class LinkerDriver; +extern LinkerDriver *Driver; + +using llvm::COFF::MachineTypes; +using llvm::COFF::WindowsSubsystem; +using llvm::Optional; +class InputFile; + +// Implemented in MarkLive.cpp. +void markLive(const std::vector<Chunk *> &Chunks); + +// Implemented in ICF.cpp. +void doICF(const std::vector<Chunk *> &Chunks); + +class ArgParser { +public: + // Parses command line options. + llvm::opt::InputArgList parse(llvm::ArrayRef<const char *> Args); + + // Concatenate LINK environment varirable and given arguments and parse them. + llvm::opt::InputArgList parseLINK(llvm::ArrayRef<const char *> Args); + + // Tokenizes a given string and then parses as command line options. + llvm::opt::InputArgList parse(StringRef S) { return parse(tokenize(S)); } + +private: + std::vector<const char *> tokenize(StringRef S); + + std::vector<const char *> replaceResponseFiles(std::vector<const char *>); +}; + +class LinkerDriver { +public: + LinkerDriver() { coff::Symtab = &Symtab; } + void link(llvm::ArrayRef<const char *> Args); + + // Used by the resolver to parse .drectve section contents. + void parseDirectives(StringRef S); + + // Used by ArchiveFile to enqueue members. + void enqueueArchiveMember(const Archive::Child &C, StringRef SymName, + StringRef ParentName); + +private: + ArgParser Parser; + SymbolTable Symtab; + + std::unique_ptr<llvm::TarWriter> Tar; // for /linkrepro + + // Opens a file. Path has to be resolved already. + MemoryBufferRef openFile(StringRef Path); + + // Searches a file from search paths. + Optional<StringRef> findFile(StringRef Filename); + Optional<StringRef> findLib(StringRef Filename); + StringRef doFindFile(StringRef Filename); + StringRef doFindLib(StringRef Filename); + + // Parses LIB environment which contains a list of search paths. + void addLibSearchPaths(); + + // Library search path. The first element is always "" (current directory). + std::vector<StringRef> SearchPaths; + std::set<std::string> VisitedFiles; + std::set<std::string> VisitedLibs; + + SymbolBody *addUndefined(StringRef Sym); + StringRef mangle(StringRef Sym); + + // Windows specific -- "main" is not the only main function in Windows. + // You can choose one from these four -- {w,}{WinMain,main}. + // There are four different entry point functions for them, + // {w,}{WinMain,main}CRTStartup, respectively. The linker needs to + // choose the right one depending on which "main" function is defined. + // This function looks up the symbol table and resolve corresponding + // entry point name. + StringRef findDefaultEntry(); + WindowsSubsystem inferSubsystem(); + + MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> MB); + void addBuffer(std::unique_ptr<MemoryBuffer> MB); + void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName, + StringRef ParentName); + + void enqueuePath(StringRef Path); + + void enqueueTask(std::function<void()> Task); + bool run(); + + // Driver is the owner of all opened files. + // InputFiles have MemoryBufferRefs to them. + std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs; + + std::list<std::function<void()>> TaskQueue; + std::vector<StringRef> FilePaths; + std::vector<MemoryBufferRef> Resources; +}; + +void parseModuleDefs(MemoryBufferRef MB); +void writeImportLibrary(); + +// Functions below this line are defined in DriverUtils.cpp. + +void printHelp(const char *Argv0); + +// For /machine option. +MachineTypes getMachineType(StringRef Arg); +StringRef machineToStr(MachineTypes MT); + +// Parses a string in the form of "<integer>[,<integer>]". +void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size = nullptr); + +// Parses a string in the form of "<integer>[.<integer>]". +// Minor's default value is 0. +void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor); + +// Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]". +void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major, + uint32_t *Minor); + +void parseAlternateName(StringRef); +void parseMerge(StringRef); +void parseSection(StringRef); + +// Parses a string in the form of "EMBED[,=<integer>]|NO". +void parseManifest(StringRef Arg); + +// Parses a string in the form of "level=<string>|uiAccess=<string>" +void parseManifestUAC(StringRef Arg); + +// Create a resource file containing a manifest XML. +std::unique_ptr<MemoryBuffer> createManifestRes(); +void createSideBySideManifest(); + +// Used for dllexported symbols. +Export parseExport(StringRef Arg); +void fixupExports(); +void assignExportOrdinals(); + +// Parses a string in the form of "key=value" and check +// if value matches previous values for the key. +// This feature used in the directive section to reject +// incompatible objects. +void checkFailIfMismatch(StringRef Arg); + +// Convert Windows resource files (.res files) to a .obj file +// using cvtres.exe. +std::unique_ptr<MemoryBuffer> +convertResToCOFF(const std::vector<MemoryBufferRef> &MBs); + +// Create enum with OPT_xxx values for each option in Options.td +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +} // namespace coff +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/COFF/DriverUtils.cpp b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp new file mode 100644 index 000000000000..14dd004f1c04 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp @@ -0,0 +1,703 @@ +//===- DriverUtils.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains utility functions for the driver. Because there +// are so many small functions, we created this separate file to make +// Driver.cpp less cluttered. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "Memory.h" +#include "Symbols.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Object/COFF.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> + +using namespace llvm::COFF; +using namespace llvm; +using llvm::cl::ExpandResponseFiles; +using llvm::cl::TokenizeWindowsCommandLine; +using llvm::sys::Process; + +namespace lld { +namespace coff { +namespace { + +class Executor { +public: + explicit Executor(StringRef S) : Saver(Alloc), Prog(Saver.save(S)) {} + void add(StringRef S) { Args.push_back(Saver.save(S).data()); } + void add(std::string &S) { Args.push_back(Saver.save(S).data()); } + void add(Twine S) { Args.push_back(Saver.save(S).data()); } + void add(const char *S) { Args.push_back(Saver.save(S).data()); } + + void run() { + ErrorOr<std::string> ExeOrErr = sys::findProgramByName(Prog); + if (auto EC = ExeOrErr.getError()) + fatal(EC, "unable to find " + Prog + " in PATH: "); + const char *Exe = Saver.save(*ExeOrErr).data(); + Args.insert(Args.begin(), Exe); + Args.push_back(nullptr); + if (sys::ExecuteAndWait(Args[0], Args.data()) != 0) { + for (const char *S : Args) + if (S) + errs() << S << " "; + fatal("ExecuteAndWait failed"); + } + } + +private: + BumpPtrAllocator Alloc; + StringSaver Saver; + StringRef Prog; + std::vector<const char *> Args; +}; + +} // anonymous namespace + +// Returns /machine's value. +MachineTypes getMachineType(StringRef S) { + MachineTypes MT = StringSwitch<MachineTypes>(S.lower()) + .Cases("x64", "amd64", AMD64) + .Cases("x86", "i386", I386) + .Case("arm", ARMNT) + .Default(IMAGE_FILE_MACHINE_UNKNOWN); + if (MT != IMAGE_FILE_MACHINE_UNKNOWN) + return MT; + fatal("unknown /machine argument: " + S); +} + +StringRef machineToStr(MachineTypes MT) { + switch (MT) { + case ARMNT: + return "arm"; + case AMD64: + return "x64"; + case I386: + return "x86"; + default: + llvm_unreachable("unknown machine type"); + } +} + +// Parses a string in the form of "<integer>[,<integer>]". +void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size) { + StringRef S1, S2; + std::tie(S1, S2) = Arg.split(','); + if (S1.getAsInteger(0, *Addr)) + fatal("invalid number: " + S1); + if (Size && !S2.empty() && S2.getAsInteger(0, *Size)) + fatal("invalid number: " + S2); +} + +// Parses a string in the form of "<integer>[.<integer>]". +// If second number is not present, Minor is set to 0. +void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor) { + StringRef S1, S2; + std::tie(S1, S2) = Arg.split('.'); + if (S1.getAsInteger(0, *Major)) + fatal("invalid number: " + S1); + *Minor = 0; + if (!S2.empty() && S2.getAsInteger(0, *Minor)) + fatal("invalid number: " + S2); +} + +// Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]". +void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major, + uint32_t *Minor) { + StringRef SysStr, Ver; + std::tie(SysStr, Ver) = Arg.split(','); + *Sys = StringSwitch<WindowsSubsystem>(SysStr.lower()) + .Case("boot_application", IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION) + .Case("console", IMAGE_SUBSYSTEM_WINDOWS_CUI) + .Case("efi_application", IMAGE_SUBSYSTEM_EFI_APPLICATION) + .Case("efi_boot_service_driver", IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER) + .Case("efi_rom", IMAGE_SUBSYSTEM_EFI_ROM) + .Case("efi_runtime_driver", IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER) + .Case("native", IMAGE_SUBSYSTEM_NATIVE) + .Case("posix", IMAGE_SUBSYSTEM_POSIX_CUI) + .Case("windows", IMAGE_SUBSYSTEM_WINDOWS_GUI) + .Default(IMAGE_SUBSYSTEM_UNKNOWN); + if (*Sys == IMAGE_SUBSYSTEM_UNKNOWN) + fatal("unknown subsystem: " + SysStr); + if (!Ver.empty()) + parseVersion(Ver, Major, Minor); +} + +// Parse a string of the form of "<from>=<to>". +// Results are directly written to Config. +void parseAlternateName(StringRef S) { + StringRef From, To; + std::tie(From, To) = S.split('='); + if (From.empty() || To.empty()) + fatal("/alternatename: invalid argument: " + S); + auto It = Config->AlternateNames.find(From); + if (It != Config->AlternateNames.end() && It->second != To) + fatal("/alternatename: conflicts: " + S); + Config->AlternateNames.insert(It, std::make_pair(From, To)); +} + +// Parse a string of the form of "<from>=<to>". +// Results are directly written to Config. +void parseMerge(StringRef S) { + StringRef From, To; + std::tie(From, To) = S.split('='); + if (From.empty() || To.empty()) + fatal("/merge: invalid argument: " + S); + auto Pair = Config->Merge.insert(std::make_pair(From, To)); + bool Inserted = Pair.second; + if (!Inserted) { + StringRef Existing = Pair.first->second; + if (Existing != To) + errs() << "warning: " << S << ": already merged into " << Existing + << "\n"; + } +} + +static uint32_t parseSectionAttributes(StringRef S) { + uint32_t Ret = 0; + for (char C : S.lower()) { + switch (C) { + case 'd': + Ret |= IMAGE_SCN_MEM_DISCARDABLE; + break; + case 'e': + Ret |= IMAGE_SCN_MEM_EXECUTE; + break; + case 'k': + Ret |= IMAGE_SCN_MEM_NOT_CACHED; + break; + case 'p': + Ret |= IMAGE_SCN_MEM_NOT_PAGED; + break; + case 'r': + Ret |= IMAGE_SCN_MEM_READ; + break; + case 's': + Ret |= IMAGE_SCN_MEM_SHARED; + break; + case 'w': + Ret |= IMAGE_SCN_MEM_WRITE; + break; + default: + fatal("/section: invalid argument: " + S); + } + } + return Ret; +} + +// Parses /section option argument. +void parseSection(StringRef S) { + StringRef Name, Attrs; + std::tie(Name, Attrs) = S.split(','); + if (Name.empty() || Attrs.empty()) + fatal("/section: invalid argument: " + S); + Config->Section[Name] = parseSectionAttributes(Attrs); +} + +// Parses a string in the form of "EMBED[,=<integer>]|NO". +// Results are directly written to Config. +void parseManifest(StringRef Arg) { + if (Arg.equals_lower("no")) { + Config->Manifest = Configuration::No; + return; + } + if (!Arg.startswith_lower("embed")) + fatal("invalid option " + Arg); + Config->Manifest = Configuration::Embed; + Arg = Arg.substr(strlen("embed")); + if (Arg.empty()) + return; + if (!Arg.startswith_lower(",id=")) + fatal("invalid option " + Arg); + Arg = Arg.substr(strlen(",id=")); + if (Arg.getAsInteger(0, Config->ManifestID)) + fatal("invalid option " + Arg); +} + +// Parses a string in the form of "level=<string>|uiAccess=<string>|NO". +// Results are directly written to Config. +void parseManifestUAC(StringRef Arg) { + if (Arg.equals_lower("no")) { + Config->ManifestUAC = false; + return; + } + for (;;) { + Arg = Arg.ltrim(); + if (Arg.empty()) + return; + if (Arg.startswith_lower("level=")) { + Arg = Arg.substr(strlen("level=")); + std::tie(Config->ManifestLevel, Arg) = Arg.split(" "); + continue; + } + if (Arg.startswith_lower("uiaccess=")) { + Arg = Arg.substr(strlen("uiaccess=")); + std::tie(Config->ManifestUIAccess, Arg) = Arg.split(" "); + continue; + } + fatal("invalid option " + Arg); + } +} + +// Quote each line with "". Existing double-quote is converted +// to two double-quotes. +static void quoteAndPrint(raw_ostream &Out, StringRef S) { + while (!S.empty()) { + StringRef Line; + std::tie(Line, S) = S.split("\n"); + if (Line.empty()) + continue; + Out << '\"'; + for (int I = 0, E = Line.size(); I != E; ++I) { + if (Line[I] == '\"') { + Out << "\"\""; + } else { + Out << Line[I]; + } + } + Out << "\"\n"; + } +} + +// An RAII temporary file class that automatically removes a temporary file. +namespace { +class TemporaryFile { +public: + TemporaryFile(StringRef Prefix, StringRef Extn) { + SmallString<128> S; + if (auto EC = sys::fs::createTemporaryFile("lld-" + Prefix, Extn, S)) + fatal(EC, "cannot create a temporary file"); + Path = S.str(); + } + + TemporaryFile(TemporaryFile &&Obj) { + std::swap(Path, Obj.Path); + } + + ~TemporaryFile() { + if (Path.empty()) + return; + if (sys::fs::remove(Path)) + fatal("failed to remove " + Path); + } + + // Returns a memory buffer of this temporary file. + // Note that this function does not leave the file open, + // so it is safe to remove the file immediately after this function + // is called (you cannot remove an opened file on Windows.) + std::unique_ptr<MemoryBuffer> getMemoryBuffer() { + // IsVolatileSize=true forces MemoryBuffer to not use mmap(). + return check(MemoryBuffer::getFile(Path, /*FileSize=*/-1, + /*RequiresNullTerminator=*/false, + /*IsVolatileSize=*/true), + "could not open " + Path); + } + + std::string Path; +}; +} + +// Create the default manifest file as a temporary file. +TemporaryFile createDefaultXml() { + // Create a temporary file. + TemporaryFile File("defaultxml", "manifest"); + + // Open the temporary file for writing. + std::error_code EC; + raw_fd_ostream OS(File.Path, EC, sys::fs::F_Text); + if (EC) + fatal(EC, "failed to open " + File.Path); + + // Emit the XML. Note that we do *not* verify that the XML attributes are + // syntactically correct. This is intentional for link.exe compatibility. + OS << "<?xml version=\"1.0\" standalone=\"yes\"?>\n" + << "<assembly xmlns=\"urn:schemas-microsoft-com:asm.v1\"\n" + << " manifestVersion=\"1.0\">\n"; + if (Config->ManifestUAC) { + OS << " <trustInfo>\n" + << " <security>\n" + << " <requestedPrivileges>\n" + << " <requestedExecutionLevel level=" << Config->ManifestLevel + << " uiAccess=" << Config->ManifestUIAccess << "/>\n" + << " </requestedPrivileges>\n" + << " </security>\n" + << " </trustInfo>\n"; + if (!Config->ManifestDependency.empty()) { + OS << " <dependency>\n" + << " <dependentAssembly>\n" + << " <assemblyIdentity " << Config->ManifestDependency << " />\n" + << " </dependentAssembly>\n" + << " </dependency>\n"; + } + } + OS << "</assembly>\n"; + OS.close(); + return File; +} + +static std::string readFile(StringRef Path) { + std::unique_ptr<MemoryBuffer> MB = + check(MemoryBuffer::getFile(Path), "could not open " + Path); + return MB->getBuffer(); +} + +static std::string createManifestXml() { + // Create the default manifest file. + TemporaryFile File1 = createDefaultXml(); + if (Config->ManifestInput.empty()) + return readFile(File1.Path); + + // If manifest files are supplied by the user using /MANIFESTINPUT + // option, we need to merge them with the default manifest. + TemporaryFile File2("user", "manifest"); + + Executor E("mt.exe"); + E.add("/manifest"); + E.add(File1.Path); + for (StringRef Filename : Config->ManifestInput) { + E.add("/manifest"); + E.add(Filename); + } + E.add("/nologo"); + E.add("/out:" + StringRef(File2.Path)); + E.run(); + return readFile(File2.Path); +} + +// Create a resource file containing a manifest XML. +std::unique_ptr<MemoryBuffer> createManifestRes() { + // Create a temporary file for the resource script file. + TemporaryFile RCFile("manifest", "rc"); + + // Open the temporary file for writing. + std::error_code EC; + raw_fd_ostream Out(RCFile.Path, EC, sys::fs::F_Text); + if (EC) + fatal(EC, "failed to open " + RCFile.Path); + + // Write resource script to the RC file. + Out << "#define LANG_ENGLISH 9\n" + << "#define SUBLANG_DEFAULT 1\n" + << "#define APP_MANIFEST " << Config->ManifestID << "\n" + << "#define RT_MANIFEST 24\n" + << "LANGUAGE LANG_ENGLISH, SUBLANG_DEFAULT\n" + << "APP_MANIFEST RT_MANIFEST {\n"; + quoteAndPrint(Out, createManifestXml()); + Out << "}\n"; + Out.close(); + + // Create output resource file. + TemporaryFile ResFile("output-resource", "res"); + + Executor E("rc.exe"); + E.add("/fo"); + E.add(ResFile.Path); + E.add("/nologo"); + E.add(RCFile.Path); + E.run(); + return ResFile.getMemoryBuffer(); +} + +void createSideBySideManifest() { + std::string Path = Config->ManifestFile; + if (Path == "") + Path = Config->OutputFile + ".manifest"; + std::error_code EC; + raw_fd_ostream Out(Path, EC, sys::fs::F_Text); + if (EC) + fatal(EC, "failed to create manifest"); + Out << createManifestXml(); +} + +// Parse a string in the form of +// "<name>[=<internalname>][,@ordinal[,NONAME]][,DATA][,PRIVATE]" +// or "<name>=<dllname>.<name>". +// Used for parsing /export arguments. +Export parseExport(StringRef Arg) { + Export E; + StringRef Rest; + std::tie(E.Name, Rest) = Arg.split(","); + if (E.Name.empty()) + goto err; + + if (E.Name.find('=') != StringRef::npos) { + StringRef X, Y; + std::tie(X, Y) = E.Name.split("="); + + // If "<name>=<dllname>.<name>". + if (Y.find(".") != StringRef::npos) { + E.Name = X; + E.ForwardTo = Y; + return E; + } + + E.ExtName = X; + E.Name = Y; + if (E.Name.empty()) + goto err; + } + + // If "<name>=<internalname>[,@ordinal[,NONAME]][,DATA][,PRIVATE]" + while (!Rest.empty()) { + StringRef Tok; + std::tie(Tok, Rest) = Rest.split(","); + if (Tok.equals_lower("noname")) { + if (E.Ordinal == 0) + goto err; + E.Noname = true; + continue; + } + if (Tok.equals_lower("data")) { + E.Data = true; + continue; + } + if (Tok.equals_lower("private")) { + E.Private = true; + continue; + } + if (Tok.startswith("@")) { + int32_t Ord; + if (Tok.substr(1).getAsInteger(0, Ord)) + goto err; + if (Ord <= 0 || 65535 < Ord) + goto err; + E.Ordinal = Ord; + continue; + } + goto err; + } + return E; + +err: + fatal("invalid /export: " + Arg); +} + +static StringRef undecorate(StringRef Sym) { + if (Config->Machine != I386) + return Sym; + return Sym.startswith("_") ? Sym.substr(1) : Sym; +} + +// Performs error checking on all /export arguments. +// It also sets ordinals. +void fixupExports() { + // Symbol ordinals must be unique. + std::set<uint16_t> Ords; + for (Export &E : Config->Exports) { + if (E.Ordinal == 0) + continue; + if (!Ords.insert(E.Ordinal).second) + fatal("duplicate export ordinal: " + E.Name); + } + + for (Export &E : Config->Exports) { + SymbolBody *Sym = E.Sym; + if (!E.ForwardTo.empty()) { + E.SymbolName = E.Name; + } else { + if (auto *U = dyn_cast<Undefined>(Sym)) + if (U->WeakAlias) + Sym = U->WeakAlias; + E.SymbolName = Sym->getName(); + } + } + + for (Export &E : Config->Exports) { + if (!E.ForwardTo.empty()) { + E.ExportName = undecorate(E.Name); + } else { + E.ExportName = undecorate(E.ExtName.empty() ? E.Name : E.ExtName); + } + } + + // Uniquefy by name. + std::map<StringRef, Export *> Map; + std::vector<Export> V; + for (Export &E : Config->Exports) { + auto Pair = Map.insert(std::make_pair(E.ExportName, &E)); + bool Inserted = Pair.second; + if (Inserted) { + V.push_back(E); + continue; + } + Export *Existing = Pair.first->second; + if (E == *Existing || E.Name != Existing->Name) + continue; + errs() << "warning: duplicate /export option: " << E.Name << "\n"; + } + Config->Exports = std::move(V); + + // Sort by name. + std::sort(Config->Exports.begin(), Config->Exports.end(), + [](const Export &A, const Export &B) { + return A.ExportName < B.ExportName; + }); +} + +void assignExportOrdinals() { + // Assign unique ordinals if default (= 0). + uint16_t Max = 0; + for (Export &E : Config->Exports) + Max = std::max(Max, E.Ordinal); + for (Export &E : Config->Exports) + if (E.Ordinal == 0) + E.Ordinal = ++Max; +} + +// Parses a string in the form of "key=value" and check +// if value matches previous values for the same key. +void checkFailIfMismatch(StringRef Arg) { + StringRef K, V; + std::tie(K, V) = Arg.split('='); + if (K.empty() || V.empty()) + fatal("/failifmismatch: invalid argument: " + Arg); + StringRef Existing = Config->MustMatch[K]; + if (!Existing.empty() && V != Existing) + fatal("/failifmismatch: mismatch detected: " + Existing + " and " + V + + " for key " + K); + Config->MustMatch[K] = V; +} + +// Convert Windows resource files (.res files) to a .obj file +// using cvtres.exe. +std::unique_ptr<MemoryBuffer> +convertResToCOFF(const std::vector<MemoryBufferRef> &MBs) { + // Create an output file path. + TemporaryFile File("resource-file", "obj"); + + // Execute cvtres.exe. + Executor E("cvtres.exe"); + E.add("/machine:" + machineToStr(Config->Machine)); + E.add("/readonly"); + E.add("/nologo"); + E.add("/out:" + Twine(File.Path)); + + // We must create new files because the memory buffers we have may have no + // underlying file still existing on the disk. + // It happens if it was created from a TemporaryFile, which usually delete + // the file just after creating the MemoryBuffer. + std::vector<TemporaryFile> ResFiles; + ResFiles.reserve(MBs.size()); + for (MemoryBufferRef MB : MBs) { + // We store the temporary file in a vector to avoid deletion + // before running cvtres + ResFiles.emplace_back("resource-file", "res"); + TemporaryFile& ResFile = ResFiles.back(); + // Write the content of the resource in a temporary file + std::error_code EC; + raw_fd_ostream OS(ResFile.Path, EC, sys::fs::F_None); + if (EC) + fatal(EC, "failed to open " + ResFile.Path); + OS << MB.getBuffer(); + OS.close(); + + E.add(ResFile.Path); + } + + E.run(); + return File.getMemoryBuffer(); +} + +// Create OptTable + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ + { \ + X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, X8, X7, \ + OPT_##GROUP, OPT_##ALIAS, X6 \ + }, +#include "Options.inc" +#undef OPTION +}; + +class COFFOptTable : public llvm::opt::OptTable { +public: + COFFOptTable() : OptTable(infoTable, true) {} +}; + +// Parses a given list of options. +opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) { + // First, replace respnose files (@<file>-style options). + std::vector<const char *> Argv = replaceResponseFiles(ArgsArr); + + // Make InputArgList from string vectors. + COFFOptTable Table; + unsigned MissingIndex; + unsigned MissingCount; + opt::InputArgList Args = Table.ParseArgs(Argv, MissingIndex, MissingCount); + + // Print the real command line if response files are expanded. + if (Args.hasArg(OPT_verbose) && ArgsArr.size() != Argv.size()) { + outs() << "Command line:"; + for (const char *S : Argv) + outs() << " " << S; + outs() << "\n"; + } + + if (MissingCount) + fatal(Twine(Args.getArgString(MissingIndex)) + ": missing argument"); + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; + return Args; +} + +// link.exe has an interesting feature. If LINK environment exists, +// its contents are handled as a command line string. So you can pass +// extra arguments using the environment variable. +opt::InputArgList ArgParser::parseLINK(ArrayRef<const char *> Args) { + // Concatenate LINK env and command line arguments, and then parse them. + Optional<std::string> Env = Process::GetEnv("LINK"); + if (!Env) + return parse(Args); + std::vector<const char *> V = tokenize(*Env); + V.insert(V.end(), Args.begin(), Args.end()); + return parse(V); +} + +std::vector<const char *> ArgParser::tokenize(StringRef S) { + SmallVector<const char *, 16> Tokens; + cl::TokenizeWindowsCommandLine(S, Saver, Tokens); + return std::vector<const char *>(Tokens.begin(), Tokens.end()); +} + +// Creates a new command line by replacing options starting with '@' +// character. '@<filename>' is replaced by the file's contents. +std::vector<const char *> +ArgParser::replaceResponseFiles(std::vector<const char *> Argv) { + SmallVector<const char *, 256> Tokens(Argv.data(), Argv.data() + Argv.size()); + ExpandResponseFiles(Saver, TokenizeWindowsCommandLine, Tokens); + return std::vector<const char *>(Tokens.begin(), Tokens.end()); +} + +void printHelp(const char *Argv0) { + COFFOptTable Table; + Table.PrintHelp(outs(), Argv0, "LLVM Linker", false); +} + +} // namespace coff +} // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Error.cpp b/contrib/llvm/tools/lld/COFF/Error.cpp new file mode 100644 index 000000000000..b2bd557413df --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Error.cpp @@ -0,0 +1,50 @@ +//===- Error.cpp ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Error.h" + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/raw_ostream.h" + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#endif + +using namespace llvm; + +namespace lld { +namespace coff { + +void fatal(const Twine &Msg) { + if (sys::Process::StandardErrHasColors()) { + errs().changeColor(raw_ostream::RED, /*bold=*/true); + errs() << "error: "; + errs().resetColor(); + } else { + errs() << "error: "; + } + errs() << Msg << "\n"; + + outs().flush(); + errs().flush(); + _exit(1); +} + +void fatal(std::error_code EC, const Twine &Msg) { + fatal(Msg + ": " + EC.message()); +} + +void fatal(llvm::Error &Err, const Twine &Msg) { + fatal(errorToErrorCode(std::move(Err)), Msg); +} + +} // namespace coff +} // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Error.h b/contrib/llvm/tools/lld/COFF/Error.h new file mode 100644 index 000000000000..47549327db2b --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Error.h @@ -0,0 +1,55 @@ +//===- Error.h --------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_ERROR_H +#define LLD_COFF_ERROR_H + +#include "lld/Core/LLVM.h" +#include "llvm/Support/Error.h" + +namespace lld { +namespace coff { + +LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix); +LLVM_ATTRIBUTE_NORETURN void fatal(llvm::Error &Err, const Twine &Prefix); + +template <class T> T check(ErrorOr<T> &&V, const Twine &Prefix) { + if (auto EC = V.getError()) + fatal(EC, Prefix); + return std::move(*V); +} + +template <class T> T check(Expected<T> E, const Twine &Prefix) { + if (llvm::Error Err = E.takeError()) + fatal(Err, Prefix); + return std::move(*E); +} + +template <class T> T check(ErrorOr<T> EO) { + if (!EO) + fatal(EO.getError().message()); + return std::move(*EO); +} + +template <class T> T check(Expected<T> E) { + if (!E) { + std::string Buf; + llvm::raw_string_ostream OS(Buf); + logAllUnhandledErrors(E.takeError(), OS, ""); + OS.flush(); + fatal(Buf); + } + return std::move(*E); +} + +} // namespace coff +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/COFF/ICF.cpp b/contrib/llvm/tools/lld/COFF/ICF.cpp new file mode 100644 index 000000000000..196fbe2610ea --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/ICF.cpp @@ -0,0 +1,256 @@ +//===- ICF.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// ICF is short for Identical Code Folding. That is a size optimization to +// identify and merge two or more read-only sections (typically functions) +// that happened to have the same contents. It usually reduces output size +// by a few percent. +// +// On Windows, ICF is enabled by default. +// +// See ELF/ICF.cpp for the details about the algortihm. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "Error.h" +#include "Symbols.h" +#include "lld/Core/Parallel.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <atomic> +#include <vector> + +using namespace llvm; + +namespace lld { +namespace coff { + +class ICF { +public: + void run(const std::vector<Chunk *> &V); + +private: + void segregate(size_t Begin, size_t End, bool Constant); + + bool equalsConstant(const SectionChunk *A, const SectionChunk *B); + bool equalsVariable(const SectionChunk *A, const SectionChunk *B); + + uint32_t getHash(SectionChunk *C); + bool isEligible(SectionChunk *C); + + size_t findBoundary(size_t Begin, size_t End); + + void forEachColorRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn); + + void forEachColor(std::function<void(size_t, size_t)> Fn); + + std::vector<SectionChunk *> Chunks; + int Cnt = 0; + std::atomic<uint32_t> NextId = {1}; + std::atomic<bool> Repeat = {false}; +}; + +// Returns a hash value for S. +uint32_t ICF::getHash(SectionChunk *C) { + return hash_combine(C->getPermissions(), + hash_value(C->SectionName), + C->NumRelocs, + C->getAlign(), + uint32_t(C->Header->SizeOfRawData), + C->Checksum); +} + +// Returns true if section S is subject of ICF. +bool ICF::isEligible(SectionChunk *C) { + bool Global = C->Sym && C->Sym->isExternal(); + bool Writable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE; + return C->isCOMDAT() && C->isLive() && Global && !Writable; +} + +// Split a range into smaller ranges by recoloring sections +void ICF::segregate(size_t Begin, size_t End, bool Constant) { + while (Begin < End) { + // Divide [Begin, End) into two. Let Mid be the start index of the + // second group. + auto Bound = std::stable_partition( + Chunks.begin() + Begin + 1, Chunks.begin() + End, [&](SectionChunk *S) { + if (Constant) + return equalsConstant(Chunks[Begin], S); + return equalsVariable(Chunks[Begin], S); + }); + size_t Mid = Bound - Chunks.begin(); + + // Split [Begin, End) into [Begin, Mid) and [Mid, End). + uint32_t Id = NextId++; + for (size_t I = Begin; I < Mid; ++I) + Chunks[I]->Color[(Cnt + 1) % 2] = Id; + + // If we created a group, we need to iterate the main loop again. + if (Mid != End) + Repeat = true; + + Begin = Mid; + } +} + +// Compare "non-moving" part of two sections, namely everything +// except relocation targets. +bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { + if (A->NumRelocs != B->NumRelocs) + return false; + + // Compare relocations. + auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { + if (R1.Type != R2.Type || + R1.VirtualAddress != R2.VirtualAddress) { + return false; + } + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex); + if (B1 == B2) + return true; + if (auto *D1 = dyn_cast<DefinedRegular>(B1)) + if (auto *D2 = dyn_cast<DefinedRegular>(B2)) + return D1->getValue() == D2->getValue() && + D1->getChunk()->Color[Cnt % 2] == D2->getChunk()->Color[Cnt % 2]; + return false; + }; + if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq)) + return false; + + // Compare section attributes and contents. + return A->getPermissions() == B->getPermissions() && + A->SectionName == B->SectionName && + A->getAlign() == B->getAlign() && + A->Header->SizeOfRawData == B->Header->SizeOfRawData && + A->Checksum == B->Checksum && + A->getContents() == B->getContents(); +} + +// Compare "moving" part of two sections, namely relocation targets. +bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) { + // Compare relocations. + auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex); + if (B1 == B2) + return true; + if (auto *D1 = dyn_cast<DefinedRegular>(B1)) + if (auto *D2 = dyn_cast<DefinedRegular>(B2)) + return D1->getChunk()->Color[Cnt % 2] == D2->getChunk()->Color[Cnt % 2]; + return false; + }; + return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq); +} + +size_t ICF::findBoundary(size_t Begin, size_t End) { + for (size_t I = Begin + 1; I < End; ++I) + if (Chunks[Begin]->Color[Cnt % 2] != Chunks[I]->Color[Cnt % 2]) + return I; + return End; +} + +void ICF::forEachColorRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn) { + if (Begin > 0) + Begin = findBoundary(Begin - 1, End); + + while (Begin < End) { + size_t Mid = findBoundary(Begin, Chunks.size()); + Fn(Begin, Mid); + Begin = Mid; + } +} + +// Call Fn on each color group. +void ICF::forEachColor(std::function<void(size_t, size_t)> Fn) { + // If the number of sections are too small to use threading, + // call Fn sequentially. + if (Chunks.size() < 1024) { + forEachColorRange(0, Chunks.size(), Fn); + return; + } + + // Split sections into 256 shards and call Fn in parallel. + size_t NumShards = 256; + size_t Step = Chunks.size() / NumShards; + parallel_for(size_t(0), NumShards, [&](size_t I) { + forEachColorRange(I * Step, (I + 1) * Step, Fn); + }); + forEachColorRange(Step * NumShards, Chunks.size(), Fn); +} + +// Merge identical COMDAT sections. +// Two sections are considered the same if their section headers, +// contents and relocations are all the same. +void ICF::run(const std::vector<Chunk *> &Vec) { + // Collect only mergeable sections and group by hash value. + for (Chunk *C : Vec) { + auto *SC = dyn_cast<SectionChunk>(C); + if (!SC) + continue; + + if (isEligible(SC)) { + // Set MSB to 1 to avoid collisions with non-hash colors. + SC->Color[0] = getHash(SC) | (1 << 31); + Chunks.push_back(SC); + } else { + SC->Color[0] = NextId++; + } + } + + if (Chunks.empty()) + return; + + // From now on, sections in Chunks are ordered so that sections in + // the same group are consecutive in the vector. + std::stable_sort(Chunks.begin(), Chunks.end(), + [](SectionChunk *A, SectionChunk *B) { + return A->Color[0] < B->Color[0]; + }); + + // Compare static contents and assign unique IDs for each static content. + forEachColor([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); + ++Cnt; + + // Split groups by comparing relocations until convergence is obtained. + do { + Repeat = false; + forEachColor( + [&](size_t Begin, size_t End) { segregate(Begin, End, false); }); + ++Cnt; + } while (Repeat); + + if (Config->Verbose) + outs() << "\nICF needed " << Cnt << " iterations\n"; + + // Merge sections in the same colors. + forEachColor([&](size_t Begin, size_t End) { + if (End - Begin == 1) + return; + + if (Config->Verbose) + outs() << "Selected " << Chunks[Begin]->getDebugName() << "\n"; + for (size_t I = Begin + 1; I < End; ++I) { + if (Config->Verbose) + outs() << " Removed " << Chunks[I]->getDebugName() << "\n"; + Chunks[Begin]->replace(Chunks[I]); + } + }); +} + +// Entry point to ICF. +void doICF(const std::vector<Chunk *> &Chunks) { ICF().run(Chunks); } + +} // namespace coff +} // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/InputFiles.cpp b/contrib/llvm/tools/lld/COFF/InputFiles.cpp new file mode 100644 index 000000000000..cde355cd3f34 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/InputFiles.cpp @@ -0,0 +1,397 @@ +//===- InputFiles.cpp -----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" +#include "Chunks.h" +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "Memory.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "llvm-c/lto.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/LTO/legacy/LTOModule.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Target/TargetOptions.h" +#include <cstring> +#include <system_error> +#include <utility> + +using namespace llvm; +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm::support::endian; + +using llvm::Triple; +using llvm::support::ulittle32_t; +using llvm::sys::fs::file_magic; +using llvm::sys::fs::identify_magic; + +namespace lld { +namespace coff { + +LLVMContext BitcodeFile::Context; + +ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + +void ArchiveFile::parse() { + // Parse a MemoryBufferRef as an archive file. + File = check(Archive::create(MB), toString(this)); + + // Read the symbol table to construct Lazy objects. + for (const Archive::Symbol &Sym : File->symbols()) + Symtab->addLazy(this, Sym); +} + +// Returns a buffer pointing to a member file containing a given symbol. +void ArchiveFile::addMember(const Archive::Symbol *Sym) { + const Archive::Child &C = + check(Sym->getMember(), + "could not get the member for symbol " + Sym->getName()); + + // Return an empty buffer if we have already returned the same buffer. + if (!Seen.insert(C.getChildOffset()).second) + return; + + Driver->enqueueArchiveMember(C, Sym->getName(), getName()); +} + +void ObjectFile::parse() { + // Parse a memory buffer as a COFF file. + std::unique_ptr<Binary> Bin = check(createBinary(MB), toString(this)); + + if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) { + Bin.release(); + COFFObj.reset(Obj); + } else { + fatal(toString(this) + " is not a COFF file"); + } + + // Read section and symbol tables. + initializeChunks(); + initializeSymbols(); + initializeSEH(); +} + +void ObjectFile::initializeChunks() { + uint32_t NumSections = COFFObj->getNumberOfSections(); + Chunks.reserve(NumSections); + SparseChunks.resize(NumSections + 1); + for (uint32_t I = 1; I < NumSections + 1; ++I) { + const coff_section *Sec; + StringRef Name; + if (auto EC = COFFObj->getSection(I, Sec)) + fatal(EC, "getSection failed: #" + Twine(I)); + if (auto EC = COFFObj->getSectionName(Sec, Name)) + fatal(EC, "getSectionName failed: #" + Twine(I)); + if (Name == ".sxdata") { + SXData = Sec; + continue; + } + if (Name == ".drectve") { + ArrayRef<uint8_t> Data; + COFFObj->getSectionContents(Sec, Data); + Directives = std::string((const char *)Data.data(), Data.size()); + continue; + } + + // Object files may have DWARF debug info or MS CodeView debug info + // (or both). + // + // DWARF sections don't need any special handling from the perspective + // of the linker; they are just a data section containing relocations. + // We can just link them to complete debug info. + // + // CodeView needs a linker support. We need to interpret and debug + // info, and then write it to a separate .pdb file. + + // Ignore debug info unless /debug is given. + if (!Config->Debug && Name.startswith(".debug")) + continue; + + // CodeView sections are stored to a different vector because they are + // not linked in the regular manner. + if (Name == ".debug" || Name.startswith(".debug$")) { + DebugChunks.push_back(new (Alloc) SectionChunk(this, Sec)); + continue; + } + + if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) + continue; + auto *C = new (Alloc) SectionChunk(this, Sec); + Chunks.push_back(C); + SparseChunks[I] = C; + } +} + +void ObjectFile::initializeSymbols() { + uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); + SymbolBodies.reserve(NumSymbols); + SparseSymbolBodies.resize(NumSymbols); + SmallVector<std::pair<SymbolBody *, uint32_t>, 8> WeakAliases; + int32_t LastSectionNumber = 0; + for (uint32_t I = 0; I < NumSymbols; ++I) { + // Get a COFFSymbolRef object. + ErrorOr<COFFSymbolRef> SymOrErr = COFFObj->getSymbol(I); + if (!SymOrErr) + fatal(SymOrErr.getError(), "broken object file: " + toString(this)); + COFFSymbolRef Sym = *SymOrErr; + + const void *AuxP = nullptr; + if (Sym.getNumberOfAuxSymbols()) + AuxP = COFFObj->getSymbol(I + 1)->getRawPtr(); + bool IsFirst = (LastSectionNumber != Sym.getSectionNumber()); + + SymbolBody *Body = nullptr; + if (Sym.isUndefined()) { + Body = createUndefined(Sym); + } else if (Sym.isWeakExternal()) { + Body = createUndefined(Sym); + uint32_t TagIndex = + static_cast<const coff_aux_weak_external *>(AuxP)->TagIndex; + WeakAliases.emplace_back(Body, TagIndex); + } else { + Body = createDefined(Sym, AuxP, IsFirst); + } + if (Body) { + SymbolBodies.push_back(Body); + SparseSymbolBodies[I] = Body; + } + I += Sym.getNumberOfAuxSymbols(); + LastSectionNumber = Sym.getSectionNumber(); + } + for (auto WeakAlias : WeakAliases) { + auto *U = dyn_cast<Undefined>(WeakAlias.first); + if (!U) + continue; + // Report an error if two undefined symbols have different weak aliases. + if (U->WeakAlias && U->WeakAlias != SparseSymbolBodies[WeakAlias.second]) + Symtab->reportDuplicate(U->symbol(), this); + U->WeakAlias = SparseSymbolBodies[WeakAlias.second]; + } +} + +SymbolBody *ObjectFile::createUndefined(COFFSymbolRef Sym) { + StringRef Name; + COFFObj->getSymbolName(Sym, Name); + return Symtab->addUndefined(Name, this, Sym.isWeakExternal())->body(); +} + +SymbolBody *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, + bool IsFirst) { + StringRef Name; + if (Sym.isCommon()) { + auto *C = new (Alloc) CommonChunk(Sym); + Chunks.push_back(C); + return Symtab->addCommon(this, Sym, C)->body(); + } + if (Sym.isAbsolute()) { + COFFObj->getSymbolName(Sym, Name); + // Skip special symbols. + if (Name == "@comp.id") + return nullptr; + // COFF spec 5.10.1. The .sxdata section. + if (Name == "@feat.00") { + if (Sym.getValue() & 1) + SEHCompat = true; + return nullptr; + } + if (Sym.isExternal()) + return Symtab->addAbsolute(Name, Sym)->body(); + else + return new (Alloc) DefinedAbsolute(Name, Sym); + } + int32_t SectionNumber = Sym.getSectionNumber(); + if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) + return nullptr; + + // Reserved sections numbers don't have contents. + if (llvm::COFF::isReservedSectionNumber(SectionNumber)) + fatal("broken object file: " + toString(this)); + + // This symbol references a section which is not present in the section + // header. + if ((uint32_t)SectionNumber >= SparseChunks.size()) + fatal("broken object file: " + toString(this)); + + // Nothing else to do without a section chunk. + auto *SC = cast_or_null<SectionChunk>(SparseChunks[SectionNumber]); + if (!SC) + return nullptr; + + // Handle section definitions + if (IsFirst && AuxP) { + auto *Aux = reinterpret_cast<const coff_aux_section_definition *>(AuxP); + if (Aux->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) + if (auto *ParentSC = cast_or_null<SectionChunk>( + SparseChunks[Aux->getNumber(Sym.isBigObj())])) + ParentSC->addAssociative(SC); + SC->Checksum = Aux->CheckSum; + } + + DefinedRegular *B; + if (Sym.isExternal()) + B = cast<DefinedRegular>(Symtab->addRegular(this, Sym, SC)->body()); + else + B = new (Alloc) DefinedRegular(this, Sym, SC); + if (SC->isCOMDAT() && Sym.getValue() == 0 && !AuxP) + SC->setSymbol(B); + + return B; +} + +void ObjectFile::initializeSEH() { + if (!SEHCompat || !SXData) + return; + ArrayRef<uint8_t> A; + COFFObj->getSectionContents(SXData, A); + if (A.size() % 4 != 0) + fatal(".sxdata must be an array of symbol table indices"); + auto *I = reinterpret_cast<const ulittle32_t *>(A.data()); + auto *E = reinterpret_cast<const ulittle32_t *>(A.data() + A.size()); + for (; I != E; ++I) + SEHandlers.insert(SparseSymbolBodies[*I]); +} + +MachineTypes ObjectFile::getMachineType() { + if (COFFObj) + return static_cast<MachineTypes>(COFFObj->getMachine()); + return IMAGE_FILE_MACHINE_UNKNOWN; +} + +StringRef ltrim1(StringRef S, const char *Chars) { + if (!S.empty() && strchr(Chars, S[0])) + return S.substr(1); + return S; +} + +void ImportFile::parse() { + const char *Buf = MB.getBufferStart(); + const char *End = MB.getBufferEnd(); + const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf); + + // Check if the total size is valid. + if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData)) + fatal("broken import library"); + + // Read names and create an __imp_ symbol. + StringRef Name = StringAlloc.save(StringRef(Buf + sizeof(*Hdr))); + StringRef ImpName = StringAlloc.save("__imp_" + Name); + const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1; + DLLName = StringRef(NameStart); + StringRef ExtName; + switch (Hdr->getNameType()) { + case IMPORT_ORDINAL: + ExtName = ""; + break; + case IMPORT_NAME: + ExtName = Name; + break; + case IMPORT_NAME_NOPREFIX: + ExtName = ltrim1(Name, "?@_"); + break; + case IMPORT_NAME_UNDECORATE: + ExtName = ltrim1(Name, "?@_"); + ExtName = ExtName.substr(0, ExtName.find('@')); + break; + } + + this->Hdr = Hdr; + ExternalName = ExtName; + + ImpSym = cast<DefinedImportData>( + Symtab->addImportData(ImpName, this)->body()); + + // If type is function, we need to create a thunk which jump to an + // address pointed by the __imp_ symbol. (This allows you to call + // DLL functions just like regular non-DLL functions.) + if (Hdr->getType() != llvm::COFF::IMPORT_CODE) + return; + ThunkSym = cast<DefinedImportThunk>( + Symtab->addImportThunk(Name, ImpSym, Hdr->Machine)->body()); +} + +void BitcodeFile::parse() { + Context.enableDebugTypeODRUniquing(); + ErrorOr<std::unique_ptr<LTOModule>> ModOrErr = LTOModule::createFromBuffer( + Context, MB.getBufferStart(), MB.getBufferSize(), llvm::TargetOptions()); + M = check(std::move(ModOrErr), "could not create LTO module"); + + StringSaver Saver(Alloc); + for (unsigned I = 0, E = M->getSymbolCount(); I != E; ++I) { + lto_symbol_attributes Attrs = M->getSymbolAttributes(I); + if ((Attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL) + continue; + + StringRef SymName = Saver.save(M->getSymbolName(I)); + int SymbolDef = Attrs & LTO_SYMBOL_DEFINITION_MASK; + if (SymbolDef == LTO_SYMBOL_DEFINITION_UNDEFINED) { + SymbolBodies.push_back(Symtab->addUndefined(SymName, this, false)->body()); + } else { + bool Replaceable = + (SymbolDef == LTO_SYMBOL_DEFINITION_TENTATIVE || // common + (Attrs & LTO_SYMBOL_COMDAT) || // comdat + (SymbolDef == LTO_SYMBOL_DEFINITION_WEAK && // weak external + (Attrs & LTO_SYMBOL_ALIAS))); + SymbolBodies.push_back( + Symtab->addBitcode(this, SymName, Replaceable)->body()); + } + } + + Directives = M->getLinkerOpts(); +} + +MachineTypes BitcodeFile::getMachineType() { + if (!M) + return IMAGE_FILE_MACHINE_UNKNOWN; + switch (Triple(M->getTargetTriple()).getArch()) { + case Triple::x86_64: + return AMD64; + case Triple::x86: + return I386; + case Triple::arm: + return ARMNT; + default: + return IMAGE_FILE_MACHINE_UNKNOWN; + } +} +} // namespace coff +} // namespace lld + +// Returns the last element of a path, which is supposed to be a filename. +static StringRef getBasename(StringRef Path) { + size_t Pos = Path.find_last_of("\\/"); + if (Pos == StringRef::npos) + return Path; + return Path.substr(Pos + 1); +} + +// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". +std::string lld::toString(coff::InputFile *File) { + if (!File) + return "(internal)"; + if (File->ParentName.empty()) + return File->getName().lower(); + + std::string Res = + (getBasename(File->ParentName) + "(" + getBasename(File->getName()) + ")") + .str(); + return StringRef(Res).lower(); +} diff --git a/contrib/llvm/tools/lld/COFF/InputFiles.h b/contrib/llvm/tools/lld/COFF/InputFiles.h new file mode 100644 index 000000000000..1b5d42939cca --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/InputFiles.h @@ -0,0 +1,210 @@ +//===- InputFiles.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_INPUT_FILES_H +#define LLD_COFF_INPUT_FILES_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/LTO/legacy/LTOModule.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/StringSaver.h" +#include <memory> +#include <set> +#include <vector> + +namespace lld { +namespace coff { + +using llvm::LTOModule; +using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; +using llvm::COFF::MachineTypes; +using llvm::object::Archive; +using llvm::object::COFFObjectFile; +using llvm::object::COFFSymbolRef; +using llvm::object::coff_import_header; +using llvm::object::coff_section; + +class Chunk; +class Defined; +class DefinedImportData; +class DefinedImportThunk; +class Lazy; +class SectionChunk; +struct Symbol; +class SymbolBody; +class Undefined; + +// The root class of input files. +class InputFile { +public: + enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind }; + Kind kind() const { return FileKind; } + virtual ~InputFile() {} + + // Returns the filename. + StringRef getName() { return MB.getBufferIdentifier(); } + + // Reads a file (the constructor doesn't do that). + virtual void parse() = 0; + + // Returns the CPU type this file was compiled to. + virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } + + // An archive file name if this file is created from an archive. + StringRef ParentName; + + // Returns .drectve section contents if exist. + StringRef getDirectives() { return StringRef(Directives).trim(); } + +protected: + InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} + + MemoryBufferRef MB; + std::string Directives; + +private: + const Kind FileKind; +}; + +// .lib or .a file. +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(MemoryBufferRef M); + static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } + void parse() override; + + // Enqueues an archive member load for the given symbol. If we've already + // enqueued a load for the same archive member, this function does nothing, + // which ensures that we don't load the same member more than once. + void addMember(const Archive::Symbol *Sym); + +private: + std::unique_ptr<Archive> File; + std::string Filename; + llvm::DenseSet<uint64_t> Seen; +}; + +// .obj or .o file. This may be a member of an archive file. +class ObjectFile : public InputFile { +public: + explicit ObjectFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ObjectKind; } + void parse() override; + MachineTypes getMachineType() override; + std::vector<Chunk *> &getChunks() { return Chunks; } + std::vector<SectionChunk *> &getDebugChunks() { return DebugChunks; } + std::vector<SymbolBody *> &getSymbols() { return SymbolBodies; } + + // Returns a SymbolBody object for the SymbolIndex'th symbol in the + // underlying object file. + SymbolBody *getSymbolBody(uint32_t SymbolIndex) { + return SparseSymbolBodies[SymbolIndex]; + } + + // Returns the underying COFF file. + COFFObjectFile *getCOFFObj() { return COFFObj.get(); } + + // True if this object file is compatible with SEH. + // COFF-specific and x86-only. + bool SEHCompat = false; + + // The list of safe exception handlers listed in .sxdata section. + // COFF-specific and x86-only. + std::set<SymbolBody *> SEHandlers; + +private: + void initializeChunks(); + void initializeSymbols(); + void initializeSEH(); + + SymbolBody *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); + SymbolBody *createUndefined(COFFSymbolRef Sym); + + std::unique_ptr<COFFObjectFile> COFFObj; + llvm::BumpPtrAllocator Alloc; + const coff_section *SXData = nullptr; + + // List of all chunks defined by this file. This includes both section + // chunks and non-section chunks for common symbols. + std::vector<Chunk *> Chunks; + + // CodeView debug info sections. + std::vector<SectionChunk *> DebugChunks; + + // This vector contains the same chunks as Chunks, but they are + // indexed such that you can get a SectionChunk by section index. + // Nonexistent section indices are filled with null pointers. + // (Because section number is 1-based, the first slot is always a + // null pointer.) + std::vector<Chunk *> SparseChunks; + + // List of all symbols referenced or defined by this file. + std::vector<SymbolBody *> SymbolBodies; + + // This vector contains the same symbols as SymbolBodies, but they + // are indexed such that you can get a SymbolBody by symbol + // index. Nonexistent indices (which are occupied by auxiliary + // symbols in the real symbol table) are filled with null pointers. + std::vector<SymbolBody *> SparseSymbolBodies; +}; + +// This type represents import library members that contain DLL names +// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 +// for details about the format. +class ImportFile : public InputFile { +public: + explicit ImportFile(MemoryBufferRef M) + : InputFile(ImportKind, M), StringAlloc(StringAllocAux) {} + static bool classof(const InputFile *F) { return F->kind() == ImportKind; } + + DefinedImportData *ImpSym = nullptr; + DefinedImportThunk *ThunkSym = nullptr; + std::string DLLName; + +private: + void parse() override; + + llvm::BumpPtrAllocator Alloc; + llvm::BumpPtrAllocator StringAllocAux; + llvm::StringSaver StringAlloc; + +public: + StringRef ExternalName; + const coff_import_header *Hdr; + Chunk *Location = nullptr; +}; + +// Used for LTO. +class BitcodeFile : public InputFile { +public: + explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } + std::vector<SymbolBody *> &getSymbols() { return SymbolBodies; } + MachineTypes getMachineType() override; + std::unique_ptr<LTOModule> takeModule() { return std::move(M); } + + static llvm::LLVMContext Context; + +private: + void parse() override; + + std::vector<SymbolBody *> SymbolBodies; + llvm::BumpPtrAllocator Alloc; + std::unique_ptr<LTOModule> M; +}; +} // namespace coff + +std::string toString(coff::InputFile *File); +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/COFF/Librarian.cpp b/contrib/llvm/tools/lld/COFF/Librarian.cpp new file mode 100644 index 000000000000..4c597fad7345 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Librarian.cpp @@ -0,0 +1,494 @@ +//===- Librarian.cpp ------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains functions for the Librarian. The librarian creates and +// manages libraries of the Common Object File Format (COFF) object files. It +// primarily is used for creating static libraries and import libraries. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "Symbols.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Path.h" + +#include <vector> + +using namespace lld::coff; +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm; + +static bool is32bit() { + switch (Config->Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return false; + case IMAGE_FILE_MACHINE_ARMNT: + case IMAGE_FILE_MACHINE_I386: + return true; + } +} + +static uint16_t getImgRelRelocation() { + switch (Config->Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return IMAGE_REL_AMD64_ADDR32NB; + case IMAGE_FILE_MACHINE_ARMNT: + return IMAGE_REL_ARM_ADDR32NB; + case IMAGE_FILE_MACHINE_I386: + return IMAGE_REL_I386_DIR32NB; + } +} + +template <class T> static void append(std::vector<uint8_t> &B, const T &Data) { + size_t S = B.size(); + B.resize(S + sizeof(T)); + memcpy(&B[S], &Data, sizeof(T)); +} + +static void writeStringTable(std::vector<uint8_t> &B, + ArrayRef<const std::string> Strings) { + // The COFF string table consists of a 4-byte value which is the size of the + // table, including the length field itself. This value is followed by the + // string content itself, which is an array of null-terminated C-style + // strings. The termination is important as they are referenced to by offset + // by the symbol entity in the file format. + + std::vector<uint8_t>::size_type Pos = B.size(); + std::vector<uint8_t>::size_type Offset = B.size(); + + // Skip over the length field, we will fill it in later as we will have + // computed the length while emitting the string content itself. + Pos += sizeof(uint32_t); + + for (const auto &S : Strings) { + B.resize(Pos + S.length() + 1); + strcpy(reinterpret_cast<char *>(&B[Pos]), S.c_str()); + Pos += S.length() + 1; + } + + // Backfill the length of the table now that it has been computed. + support::ulittle32_t Length(B.size() - Offset); + memcpy(&B[Offset], &Length, sizeof(Length)); +} + +static std::string getImplibPath() { + if (!Config->Implib.empty()) + return Config->Implib; + SmallString<128> Out = StringRef(Config->OutputFile); + sys::path::replace_extension(Out, ".lib"); + return Out.str(); +} + +static ImportNameType getNameType(StringRef Sym, StringRef ExtName) { + if (Sym != ExtName) + return IMPORT_NAME_UNDECORATE; + if (Config->Machine == I386 && Sym.startswith("_")) + return IMPORT_NAME_NOPREFIX; + return IMPORT_NAME; +} + +static std::string replace(StringRef S, StringRef From, StringRef To) { + size_t Pos = S.find(From); + assert(Pos != StringRef::npos); + return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); +} + +static const std::string NullImportDescriptorSymbolName = + "__NULL_IMPORT_DESCRIPTOR"; + +namespace { +// This class constructs various small object files necessary to support linking +// symbols imported from a DLL. The contents are pretty strictly defined and +// nearly entirely static. The details of the structures files are defined in +// WINNT.h and the PE/COFF specification. +class ObjectFactory { + using u16 = support::ulittle16_t; + using u32 = support::ulittle32_t; + + BumpPtrAllocator Alloc; + StringRef DLLName; + StringRef Library; + std::string ImportDescriptorSymbolName; + std::string NullThunkSymbolName; + +public: + ObjectFactory(StringRef S) + : DLLName(S), Library(S.drop_back(4)), + ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), + NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} + + // Creates an Import Descriptor. This is a small object file which contains a + // reference to the terminators and contains the library name (entry) for the + // import name table. It will force the linker to construct the necessary + // structure to import symbols from the DLL. + NewArchiveMember createImportDescriptor(std::vector<uint8_t> &Buffer); + + // Creates a NULL import descriptor. This is a small object file whcih + // contains a NULL import descriptor. It is used to terminate the imports + // from a specific DLL. + NewArchiveMember createNullImportDescriptor(std::vector<uint8_t> &Buffer); + + // Create a NULL Thunk Entry. This is a small object file which contains a + // NULL Import Address Table entry and a NULL Import Lookup Table Entry. It + // is used to terminate the IAT and ILT. + NewArchiveMember createNullThunk(std::vector<uint8_t> &Buffer); + + // Create a short import file which is described in PE/COFF spec 7. Import + // Library Format. + NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, + ImportNameType NameType, bool isData); +}; +} + +NewArchiveMember +ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) { + static const uint32_t NumberOfSections = 2; + static const uint32_t NumberOfSymbols = 7; + static const uint32_t NumberOfRelocations = 3; + + // COFF Header + coff_file_header Header{ + u16(Config->Machine), u16(NumberOfSections), u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$2 + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation) + + // .idata$4 + (DLLName.size() + 1)), + u32(NumberOfSymbols), u16(0), + u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry)), + u32(0), + u16(NumberOfRelocations), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}, + u32(0), + u32(0), + u32(DLLName.size() + 1), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$2 + static const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + static const coff_relocation RelocationTable[NumberOfRelocations] = { + {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2), + u16(getImgRelRelocation())}, + {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)), + u32(3), u16(getImgRelRelocation())}, + {u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)), + u32(4), u16(getImgRelRelocation())}, + }; + append(Buffer, RelocationTable); + + // .idata$6 + auto S = Buffer.size(); + Buffer.resize(S + DLLName.size() + 1); + memcpy(&Buffer[S], DLLName.data(), DLLName.size()); + Buffer[S + DLLName.size()] = '\0'; + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}}, + u32(0), + u16(2), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset = + sizeof(uint32_t); + reinterpret_cast<StringTableOffset &>(SymbolTable[5].Name).Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1; + reinterpret_cast<StringTableOffset &>(SymbolTable[6].Name).Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 + + NullImportDescriptorSymbolName.length() + 1; + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, + {ImportDescriptorSymbolName, NullImportDescriptorSymbolName, + NullThunkSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, DLLName)}; +} + +NewArchiveMember +ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) { + static const uint32_t NumberOfSections = 1; + static const uint32_t NumberOfSymbols = 1; + + // COFF Header + coff_file_header Header{ + u16(Config->Machine), u16(NumberOfSections), u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$3 + sizeof(coff_import_directory_table_entry)), + u32(NumberOfSymbols), u16(0), + u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + + (NumberOfSections * sizeof(coff_section))), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$3 + static const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset = + sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullImportDescriptorSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, DLLName)}; +} + +NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) { + static const uint32_t NumberOfSections = 2; + static const uint32_t NumberOfSymbols = 1; + uint32_t VASize = is32bit() ? 4 : 8; + + // COFF Header + coff_file_header Header{ + u16(Config->Machine), u16(NumberOfSections), u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$5 + VASize + + // .idata$4 + VASize), + u32(NumberOfSymbols), u16(0), + u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit() ? IMAGE_SCN_ALIGN_4BYTES : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + VASize), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit() ? IMAGE_SCN_ALIGN_4BYTES : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$5, ILT + append(Buffer, u32(0)); + if (!is32bit()) + append(Buffer, u32(0)); + + // .idata$4, IAT + append(Buffer, u32(0)); + if (!is32bit()) + append(Buffer, u32(0)); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset = + sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullThunkSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef{F, DLLName}}; +} + +NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, + uint16_t Ordinal, + ImportNameType NameType, + bool isData) { + size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs + size_t Size = sizeof(coff_import_header) + ImpSize; + char *Buf = Alloc.Allocate<char>(Size); + memset(Buf, 0, Size); + char *P = Buf; + + // Write short import library. + auto *Imp = reinterpret_cast<coff_import_header *>(P); + P += sizeof(*Imp); + Imp->Sig2 = 0xFFFF; + Imp->Machine = Config->Machine; + Imp->SizeOfData = ImpSize; + if (Ordinal > 0) + Imp->OrdinalHint = Ordinal; + Imp->TypeInfo = (isData ? IMPORT_DATA : IMPORT_CODE); + Imp->TypeInfo |= NameType << 2; + + // Write symbol name and DLL name. + memcpy(P, Sym.data(), Sym.size()); + P += Sym.size() + 1; + memcpy(P, DLLName.data(), DLLName.size()); + + return {MemoryBufferRef(StringRef(Buf, Size), DLLName)}; +} + +// Creates an import library for a DLL. In this function, we first +// create an empty import library using lib.exe and then adds short +// import files to that file. +void lld::coff::writeImportLibrary() { + std::vector<NewArchiveMember> Members; + + std::string Path = getImplibPath(); + std::string DLLName = sys::path::filename(Config->OutputFile); + ObjectFactory OF(DLLName); + + std::vector<uint8_t> ImportDescriptor; + Members.push_back(OF.createImportDescriptor(ImportDescriptor)); + + std::vector<uint8_t> NullImportDescriptor; + Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor)); + + std::vector<uint8_t> NullThunk; + Members.push_back(OF.createNullThunk(NullThunk)); + + for (Export &E : Config->Exports) { + if (E.Private) + continue; + + ImportNameType Type = getNameType(E.SymbolName, E.Name); + std::string Name = E.ExtName.empty() + ? std::string(E.SymbolName) + : replace(E.SymbolName, E.Name, E.ExtName); + Members.push_back(OF.createShortImport(Name, E.Ordinal, Type, E.Data)); + } + + std::pair<StringRef, std::error_code> Result = + writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU, + /*Deterministic*/ true, /*Thin*/ false); + if (auto EC = Result.second) + fatal(EC, "failed to write " + Path); +} diff --git a/contrib/llvm/tools/lld/COFF/MarkLive.cpp b/contrib/llvm/tools/lld/COFF/MarkLive.cpp new file mode 100644 index 000000000000..0156d238b672 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/MarkLive.cpp @@ -0,0 +1,61 @@ +//===- MarkLive.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "Symbols.h" +#include "llvm/ADT/STLExtras.h" +#include <vector> + +namespace lld { +namespace coff { + +// Set live bit on for each reachable chunk. Unmarked (unreachable) +// COMDAT chunks will be ignored by Writer, so they will be excluded +// from the final output. +void markLive(const std::vector<Chunk *> &Chunks) { + // We build up a worklist of sections which have been marked as live. We only + // push into the worklist when we discover an unmarked section, and we mark + // as we push, so sections never appear twice in the list. + SmallVector<SectionChunk *, 256> Worklist; + + // COMDAT section chunks are dead by default. Add non-COMDAT chunks. + for (Chunk *C : Chunks) + if (auto *SC = dyn_cast<SectionChunk>(C)) + if (SC->isLive()) + Worklist.push_back(SC); + + auto Enqueue = [&](SectionChunk *C) { + if (C->isLive()) + return; + C->markLive(); + Worklist.push_back(C); + }; + + // Add GC root chunks. + for (SymbolBody *B : Config->GCRoot) + if (auto *D = dyn_cast<DefinedRegular>(B)) + Enqueue(D->getChunk()); + + while (!Worklist.empty()) { + SectionChunk *SC = Worklist.pop_back_val(); + assert(SC->isLive() && "We mark as live when pushing onto the worklist!"); + + // Mark all symbols listed in the relocation table for this section. + for (SymbolBody *S : SC->symbols()) + if (auto *D = dyn_cast<DefinedRegular>(S)) + Enqueue(D->getChunk()); + + // Mark associative sections if any. + for (SectionChunk *C : SC->children()) + Enqueue(C); + } +} + +} +} diff --git a/contrib/llvm/tools/lld/COFF/Memory.h b/contrib/llvm/tools/lld/COFF/Memory.h new file mode 100644 index 000000000000..526f11344a09 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Memory.h @@ -0,0 +1,52 @@ +//===- Memory.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// See ELF/Memory.h +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_MEMORY_H +#define LLD_COFF_MEMORY_H + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" +#include <vector> + +namespace lld { +namespace coff { + +extern llvm::BumpPtrAllocator BAlloc; +extern llvm::StringSaver Saver; + +struct SpecificAllocBase { + SpecificAllocBase() { Instances.push_back(this); } + virtual ~SpecificAllocBase() = default; + virtual void reset() = 0; + static std::vector<SpecificAllocBase *> Instances; +}; + +template <class T> struct SpecificAlloc : public SpecificAllocBase { + void reset() override { Alloc.DestroyAll(); } + llvm::SpecificBumpPtrAllocator<T> Alloc; +}; + +template <typename T, typename... U> T *make(U &&... Args) { + static SpecificAlloc<T> Alloc; + return new (Alloc.Alloc.Allocate()) T(std::forward<U>(Args)...); +} + +inline void freeArena() { + for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances) + Alloc->reset(); + BAlloc.Reset(); +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/COFF/ModuleDef.cpp b/contrib/llvm/tools/lld/COFF/ModuleDef.cpp new file mode 100644 index 000000000000..a273b6f535db --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/ModuleDef.cpp @@ -0,0 +1,289 @@ +//===- COFF/ModuleDef.cpp -------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Windows-specific. +// A parser for the module-definition file (.def file). +// Parsed results are directly written to Config global variable. +// +// The format of module-definition files are described in this document: +// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Error.h" +#include "Memory.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/raw_ostream.h" +#include <system_error> + +using namespace llvm; + +namespace lld { +namespace coff { +namespace { + +enum Kind { + Unknown, + Eof, + Identifier, + Comma, + Equal, + KwBase, + KwData, + KwExports, + KwHeapsize, + KwLibrary, + KwName, + KwNoname, + KwPrivate, + KwStacksize, + KwVersion, +}; + +struct Token { + explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} + Kind K; + StringRef Value; +}; + +static bool isDecorated(StringRef Sym) { + return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?"); +} + +class Lexer { +public: + explicit Lexer(StringRef S) : Buf(S) {} + + Token lex() { + Buf = Buf.trim(); + if (Buf.empty()) + return Token(Eof); + + switch (Buf[0]) { + case '\0': + return Token(Eof); + case ';': { + size_t End = Buf.find('\n'); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return lex(); + } + case '=': + Buf = Buf.drop_front(); + return Token(Equal, "="); + case ',': + Buf = Buf.drop_front(); + return Token(Comma, ","); + case '"': { + StringRef S; + std::tie(S, Buf) = Buf.substr(1).split('"'); + return Token(Identifier, S); + } + default: { + size_t End = Buf.find_first_of("=,\r\n \t\v"); + StringRef Word = Buf.substr(0, End); + Kind K = llvm::StringSwitch<Kind>(Word) + .Case("BASE", KwBase) + .Case("DATA", KwData) + .Case("EXPORTS", KwExports) + .Case("HEAPSIZE", KwHeapsize) + .Case("LIBRARY", KwLibrary) + .Case("NAME", KwName) + .Case("NONAME", KwNoname) + .Case("PRIVATE", KwPrivate) + .Case("STACKSIZE", KwStacksize) + .Case("VERSION", KwVersion) + .Default(Identifier); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return Token(K, Word); + } + } + } + +private: + StringRef Buf; +}; + +class Parser { +public: + explicit Parser(StringRef S) : Lex(S) {} + + void parse() { + do { + parseOne(); + } while (Tok.K != Eof); + } + +private: + void read() { + if (Stack.empty()) { + Tok = Lex.lex(); + return; + } + Tok = Stack.back(); + Stack.pop_back(); + } + + void readAsInt(uint64_t *I) { + read(); + if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) + fatal("integer expected"); + } + + void expect(Kind Expected, StringRef Msg) { + read(); + if (Tok.K != Expected) + fatal(Msg); + } + + void unget() { Stack.push_back(Tok); } + + void parseOne() { + read(); + switch (Tok.K) { + case Eof: + return; + case KwExports: + for (;;) { + read(); + if (Tok.K != Identifier) { + unget(); + return; + } + parseExport(); + } + case KwHeapsize: + parseNumbers(&Config->HeapReserve, &Config->HeapCommit); + return; + case KwLibrary: + parseName(&Config->OutputFile, &Config->ImageBase); + if (!StringRef(Config->OutputFile).endswith_lower(".dll")) + Config->OutputFile += ".dll"; + return; + case KwStacksize: + parseNumbers(&Config->StackReserve, &Config->StackCommit); + return; + case KwName: + parseName(&Config->OutputFile, &Config->ImageBase); + return; + case KwVersion: + parseVersion(&Config->MajorImageVersion, &Config->MinorImageVersion); + return; + default: + fatal("unknown directive: " + Tok.Value); + } + } + + void parseExport() { + Export E; + E.Name = Tok.Value; + read(); + if (Tok.K == Equal) { + read(); + if (Tok.K != Identifier) + fatal("identifier expected, but got " + Tok.Value); + E.ExtName = E.Name; + E.Name = Tok.Value; + } else { + unget(); + } + + if (Config->Machine == I386) { + if (!isDecorated(E.Name)) + E.Name = Saver.save("_" + E.Name); + if (!E.ExtName.empty() && !isDecorated(E.ExtName)) + E.ExtName = Saver.save("_" + E.ExtName); + } + + for (;;) { + read(); + if (Tok.K == Identifier && Tok.Value[0] == '@') { + Tok.Value.drop_front().getAsInteger(10, E.Ordinal); + read(); + if (Tok.K == KwNoname) { + E.Noname = true; + } else { + unget(); + } + continue; + } + if (Tok.K == KwData) { + E.Data = true; + continue; + } + if (Tok.K == KwPrivate) { + E.Private = true; + continue; + } + unget(); + Config->Exports.push_back(E); + return; + } + } + + // HEAPSIZE/STACKSIZE reserve[,commit] + void parseNumbers(uint64_t *Reserve, uint64_t *Commit) { + readAsInt(Reserve); + read(); + if (Tok.K != Comma) { + unget(); + Commit = nullptr; + return; + } + readAsInt(Commit); + } + + // NAME outputPath [BASE=address] + void parseName(std::string *Out, uint64_t *Baseaddr) { + read(); + if (Tok.K == Identifier) { + *Out = Tok.Value; + } else { + *Out = ""; + unget(); + return; + } + read(); + if (Tok.K == KwBase) { + expect(Equal, "'=' expected"); + readAsInt(Baseaddr); + } else { + unget(); + *Baseaddr = 0; + } + } + + // VERSION major[.minor] + void parseVersion(uint32_t *Major, uint32_t *Minor) { + read(); + if (Tok.K != Identifier) + fatal("identifier expected, but got " + Tok.Value); + StringRef V1, V2; + std::tie(V1, V2) = Tok.Value.split('.'); + if (V1.getAsInteger(10, *Major)) + fatal("integer expected, but got " + Tok.Value); + if (V2.empty()) + *Minor = 0; + else if (V2.getAsInteger(10, *Minor)) + fatal("integer expected, but got " + Tok.Value); + } + + Lexer Lex; + Token Tok; + std::vector<Token> Stack; +}; + +} // anonymous namespace + +void parseModuleDefs(MemoryBufferRef MB) { Parser(MB.getBuffer()).parse(); } + +} // namespace coff +} // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Options.td b/contrib/llvm/tools/lld/COFF/Options.td new file mode 100644 index 000000000000..9dfbcc8e188c --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Options.td @@ -0,0 +1,133 @@ +include "llvm/Option/OptParser.td" + +// link.exe accepts options starting with either a dash or a slash. + +// Flag that takes no arguments. +class F<string name> : Flag<["/", "-", "-?"], name>; + +// Flag that takes one argument after ":". +class P<string name, string help> : + Joined<["/", "-", "-?"], name#":">, HelpText<help>; + +// Boolean flag suffixed by ":no". +multiclass B<string name, string help> { + def "" : F<name>; + def _no : F<name#":no">, HelpText<help>; +} + +def align : P<"align", "Section alignment">; +def alternatename : P<"alternatename", "Define weak alias">; +def base : P<"base", "Base address of the program">; +def defaultlib : P<"defaultlib", "Add the library to the list of input files">; +def delayload : P<"delayload", "Delay loaded DLL name">; +def entry : P<"entry", "Name of entry point symbol">; +def export : P<"export", "Export a function">; +// No help text because /failifmismatch is not intended to be used by the user. +def failifmismatch : P<"failifmismatch", "">; +def heap : P<"heap", "Size of the heap">; +def implib : P<"implib", "Import library name">; +def libpath : P<"libpath", "Additional library search path">; +def linkrepro : P<"linkrepro", "Dump linker invocation and input files for debugging">; +def machine : P<"machine", "Specify target platform">; +def merge : P<"merge", "Combine sections">; +def mllvm : P<"mllvm", "Options to pass to LLVM">; +def nodefaultlib : P<"nodefaultlib", "Remove a default library">; +def opt : P<"opt", "Control optimizations">; +def out : P<"out", "Path to file to write output">; +def pdb : P<"pdb", "PDB file path">; +def section : P<"section", "Specify section attributes">; +def stack : P<"stack", "Size of the stack">; +def stub : P<"stub", "Specify DOS stub file">; +def subsystem : P<"subsystem", "Specify subsystem">; +def version : P<"version", "Specify a version number in the PE header">; + +def disallowlib : Joined<["/", "-", "-?"], "disallowlib:">, Alias<nodefaultlib>; + +def manifest : F<"manifest">; +def manifest_colon : P<"manifest", "Create manifest file">; +def manifestuac : P<"manifestuac", "User access control">; +def manifestfile : P<"manifestfile", "Manifest file path">; +def manifestdependency : P<"manifestdependency", + "Attributes for <dependency> in manifest file">; +def manifestinput : P<"manifestinput", "Specify manifest file">; + +// We cannot use multiclass P because class name "incl" is different +// from its command line option name. We do this because "include" is +// a reserved keyword in tablegen. +def incl : Joined<["/", "-"], "include:">, + HelpText<"Force symbol to be added to symbol table as undefined one">; + +// "def" is also a keyword. +def deffile : Joined<["/", "-"], "def:">, + HelpText<"Use module-definition file">; + +def debug : F<"debug">, HelpText<"Embed a symbol table in the image">; +def debugtype : P<"debugtype", "Debug Info Options">; +def dll : F<"dll">, HelpText<"Create a DLL">; +def driver : P<"driver", "Generate a Windows NT Kernel Mode Driver">; +def nodefaultlib_all : F<"nodefaultlib">; +def noentry : F<"noentry">; +def profile : F<"profile">; +def swaprun_cd : F<"swaprun:cd">; +def swaprun_net : F<"swaprun:net">; +def verbose : F<"verbose">; + +def force : F<"force">, + HelpText<"Allow undefined symbols when creating executables">; +def force_unresolved : F<"force:unresolved">; + +defm allowbind: B<"allowbind", "Disable DLL binding">; +defm allowisolation : B<"allowisolation", "Set NO_ISOLATION bit">; +defm dynamicbase : B<"dynamicbase", + "Disable address space layout randomization">; +defm fixed : B<"fixed", "Enable base relocations">; +defm highentropyva : B<"highentropyva", "Set HIGH_ENTROPY_VA bit">; +defm largeaddressaware : B<"largeaddressaware", "Disable large addresses">; +defm nxcompat : B<"nxcompat", "Disable data execution provention">; +defm safeseh : B<"safeseh", "Produce an image with Safe Exception Handler">; +defm tsaware : B<"tsaware", "Create non-Terminal Server aware executable">; + +def help : F<"help">; +def help_q : Flag<["/?", "-?"], "">, Alias<help>; + +// LLD extensions +def nosymtab : F<"nosymtab">; + +// Flags for debugging +def debugpdb : F<"debugpdb">; +def dumppdb : Joined<["/", "-"], "dumppdb">; +def lldmap : F<"lldmap">; +def lldmap_file : Joined<["/", "-"], "lldmap:">; + +//============================================================================== +// The flags below do nothing. They are defined only for link.exe compatibility. +//============================================================================== + +class QF<string name> : Joined<["/", "-", "-?"], name#":">; + +multiclass QB<string name> { + def "" : F<name>; + def _no : F<name#":no">; +} + +def functionpadmin : F<"functionpadmin">; +def ignoreidl : F<"ignoreidl">; +def incremental : F<"incremental">; +def no_incremental : F<"incremental:no">; +def nologo : F<"nologo">; +def throwingnew : F<"throwingnew">; +def editandcontinue : F<"editandcontinue">; +def fastfail : F<"fastfail">; + +def delay : QF<"delay">; +def errorreport : QF<"errorreport">; +def idlout : QF<"idlout">; +def ignore : QF<"ignore">; +def maxilksize : QF<"maxilksize">; +def pdbaltpath : QF<"pdbaltpath">; +def tlbid : QF<"tlbid">; +def tlbout : QF<"tlbout">; +def verbose_all : QF<"verbose">; +def guardsym : QF<"guardsym">; + +defm wx : QB<"wx">; diff --git a/contrib/llvm/tools/lld/COFF/PDB.cpp b/contrib/llvm/tools/lld/COFF/PDB.cpp new file mode 100644 index 000000000000..d5c52a69be69 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/PDB.cpp @@ -0,0 +1,193 @@ +//===- PDB.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PDB.h" +#include "Chunks.h" +#include "Config.h" +#include "Error.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "llvm/DebugInfo/CodeView/CVDebugRecord.h" +#include "llvm/DebugInfo/CodeView/SymbolDumper.h" +#include "llvm/DebugInfo/CodeView/TypeDumper.h" +#include "llvm/DebugInfo/MSF/ByteStream.h" +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/PDB/Raw/DbiStream.h" +#include "llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStream.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/TpiStream.h" +#include "llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/ScopedPrinter.h" +#include <memory> + +using namespace lld; +using namespace lld::coff; +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::support; +using namespace llvm::support::endian; + +using llvm::object::coff_section; + +static ExitOnError ExitOnErr; + +// Returns a list of all SectionChunks. +static std::vector<coff_section> getInputSections(SymbolTable *Symtab) { + std::vector<coff_section> V; + for (Chunk *C : Symtab->getChunks()) + if (auto *SC = dyn_cast<SectionChunk>(C)) + V.push_back(*SC->Header); + return V; +} + +static SectionChunk *findByName(std::vector<SectionChunk *> &Sections, + StringRef Name) { + for (SectionChunk *C : Sections) + if (C->getSectionName() == Name) + return C; + return nullptr; +} + +static ArrayRef<uint8_t> getDebugT(ObjectFile *File) { + SectionChunk *Sec = findByName(File->getDebugChunks(), ".debug$T"); + if (!Sec) + return {}; + + // First 4 bytes are section magic. + ArrayRef<uint8_t> Data = Sec->getContents(); + if (Data.size() < 4) + fatal(".debug$T too short"); + if (read32le(Data.data()) != COFF::DEBUG_SECTION_MAGIC) + fatal(".debug$T has an invalid magic"); + return Data.slice(4); +} + +static void dumpDebugT(ScopedPrinter &W, ObjectFile *File) { + ArrayRef<uint8_t> Data = getDebugT(File); + if (Data.empty()) + return; + + msf::ByteStream Stream(Data); + CVTypeDumper TypeDumper(&W, false); + if (auto EC = TypeDumper.dump(Data)) + fatal(EC, "CVTypeDumper::dump failed"); +} + +static void dumpDebugS(ScopedPrinter &W, ObjectFile *File) { + SectionChunk *Sec = findByName(File->getDebugChunks(), ".debug$S"); + if (!Sec) + return; + + msf::ByteStream Stream(Sec->getContents()); + CVSymbolArray Symbols; + msf::StreamReader Reader(Stream); + if (auto EC = Reader.readArray(Symbols, Reader.getLength())) + fatal(EC, "StreamReader.readArray<CVSymbolArray> failed"); + + CVTypeDumper TypeDumper(&W, false); + CVSymbolDumper SymbolDumper(W, TypeDumper, nullptr, false); + if (auto EC = SymbolDumper.dump(Symbols)) + fatal(EC, "CVSymbolDumper::dump failed"); +} + +// Dump CodeView debug info. This is for debugging. +static void dumpCodeView(SymbolTable *Symtab) { + ScopedPrinter W(outs()); + + for (ObjectFile *File : Symtab->ObjectFiles) { + dumpDebugT(W, File); + dumpDebugS(W, File); + } +} + +static void addTypeInfo(SymbolTable *Symtab, + pdb::TpiStreamBuilder &TpiBuilder) { + for (ObjectFile *File : Symtab->ObjectFiles) { + ArrayRef<uint8_t> Data = getDebugT(File); + if (Data.empty()) + continue; + + msf::ByteStream Stream(Data); + codeview::CVTypeArray Records; + msf::StreamReader Reader(Stream); + if (auto EC = Reader.readArray(Records, Reader.getLength())) + fatal(EC, "Reader.readArray failed"); + for (const codeview::CVType &Rec : Records) + TpiBuilder.addTypeRecord(Rec); + } +} + +// Creates a PDB file. +void coff::createPDB(StringRef Path, SymbolTable *Symtab, + ArrayRef<uint8_t> SectionTable, + const llvm::codeview::DebugInfo *DI) { + if (Config->DumpPdb) + dumpCodeView(Symtab); + + BumpPtrAllocator Alloc; + pdb::PDBFileBuilder Builder(Alloc); + ExitOnErr(Builder.initialize(4096)); // 4096 is blocksize + + // Create streams in MSF for predefined streams, namely + // PDB, TPI, DBI and IPI. + for (int I = 0; I < (int)pdb::kSpecialStreamCount; ++I) + ExitOnErr(Builder.getMsfBuilder().addStream(0)); + + // Add an Info stream. + auto &InfoBuilder = Builder.getInfoBuilder(); + InfoBuilder.setAge(DI->PDB70.Age); + InfoBuilder.setGuid( + *reinterpret_cast<const pdb::PDB_UniqueId *>(&DI->PDB70.Signature)); + // Should be the current time, but set 0 for reproducibilty. + InfoBuilder.setSignature(0); + InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70); + + // Add an empty DPI stream. + auto &DbiBuilder = Builder.getDbiBuilder(); + DbiBuilder.setVersionHeader(pdb::PdbDbiV110); + + // Add an empty TPI stream. + auto &TpiBuilder = Builder.getTpiBuilder(); + TpiBuilder.setVersionHeader(pdb::PdbTpiV80); + if (Config->DebugPdb) + addTypeInfo(Symtab, TpiBuilder); + + // Add an empty IPI stream. + auto &IpiBuilder = Builder.getIpiBuilder(); + IpiBuilder.setVersionHeader(pdb::PdbTpiV80); + + // Add Section Contributions. + std::vector<pdb::SectionContrib> Contribs = + pdb::DbiStreamBuilder::createSectionContribs(getInputSections(Symtab)); + DbiBuilder.setSectionContribs(Contribs); + + // Add Section Map stream. + ArrayRef<object::coff_section> Sections = { + (const object::coff_section *)SectionTable.data(), + SectionTable.size() / sizeof(object::coff_section)}; + std::vector<pdb::SecMapEntry> SectionMap = + pdb::DbiStreamBuilder::createSectionMap(Sections); + DbiBuilder.setSectionMap(SectionMap); + + ExitOnErr(DbiBuilder.addModuleInfo("", "* Linker *")); + + // Add COFF section header stream. + ExitOnErr( + DbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, SectionTable)); + + // Write to a file. + ExitOnErr(Builder.commit(Path)); +} diff --git a/contrib/llvm/tools/lld/COFF/PDB.h b/contrib/llvm/tools/lld/COFF/PDB.h new file mode 100644 index 000000000000..c9c37914299a --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/PDB.h @@ -0,0 +1,32 @@ +//===- PDB.h ----------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_PDB_H +#define LLD_COFF_PDB_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { +namespace codeview { +union DebugInfo; +} +} + +namespace lld { +namespace coff { +class SymbolTable; + +void createPDB(llvm::StringRef Path, SymbolTable *Symtab, + llvm::ArrayRef<uint8_t> SectionTable, + const llvm::codeview::DebugInfo *DI); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/COFF/README.md b/contrib/llvm/tools/lld/COFF/README.md new file mode 100644 index 000000000000..f1bfc9c15263 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/README.md @@ -0,0 +1 @@ +See docs/NewLLD.rst diff --git a/contrib/llvm/tools/lld/COFF/Strings.cpp b/contrib/llvm/tools/lld/COFF/Strings.cpp new file mode 100644 index 000000000000..d0558413f673 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Strings.cpp @@ -0,0 +1,30 @@ +//===- Strings.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Strings.h" + +#if defined(_MSC_VER) +#include <Windows.h> +#include <DbgHelp.h> +#pragma comment(lib, "dbghelp.lib") +#endif + +using namespace lld; +using namespace lld::coff; +using namespace llvm; + +Optional<std::string> coff::demangle(StringRef S) { +#if defined(_MSC_VER) + char Buf[4096]; + if (S.startswith("?")) + if (size_t Len = UnDecorateSymbolName(S.str().c_str(), Buf, sizeof(Buf), 0)) + return std::string(Buf, Len); +#endif + return None; +} diff --git a/contrib/llvm/tools/lld/COFF/Strings.h b/contrib/llvm/tools/lld/COFF/Strings.h new file mode 100644 index 000000000000..1f85f3e2da5c --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Strings.h @@ -0,0 +1,23 @@ +//===- Strings.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_STRINGS_H +#define LLD_COFF_STRINGS_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include <string> + +namespace lld { +namespace coff { +llvm::Optional<std::string> demangle(llvm::StringRef S); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/COFF/SymbolTable.cpp b/contrib/llvm/tools/lld/COFF/SymbolTable.cpp new file mode 100644 index 000000000000..9cc0b75c1510 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/SymbolTable.cpp @@ -0,0 +1,420 @@ +//===- SymbolTable.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SymbolTable.h" +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "Memory.h" +#include "Symbols.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/LTO/legacy/LTOCodeGenerator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <utility> + +using namespace llvm; + +namespace lld { +namespace coff { + +SymbolTable *Symtab; + +void SymbolTable::addFile(InputFile *File) { + if (Config->Verbose) + outs() << "Reading " << toString(File) << "\n"; + File->parse(); + + MachineTypes MT = File->getMachineType(); + if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { + Config->Machine = MT; + } else if (MT != IMAGE_FILE_MACHINE_UNKNOWN && Config->Machine != MT) { + fatal(toString(File) + ": machine type " + machineToStr(MT) + + " conflicts with " + machineToStr(Config->Machine)); + } + + if (auto *F = dyn_cast<ObjectFile>(File)) { + ObjectFiles.push_back(F); + } else if (auto *F = dyn_cast<BitcodeFile>(File)) { + BitcodeFiles.push_back(F); + } else if (auto *F = dyn_cast<ImportFile>(File)) { + ImportFiles.push_back(F); + } + + StringRef S = File->getDirectives(); + if (S.empty()) + return; + + if (Config->Verbose) + outs() << "Directives: " << toString(File) << ": " << S << "\n"; + Driver->parseDirectives(S); +} + +void SymbolTable::reportRemainingUndefines() { + SmallPtrSet<SymbolBody *, 8> Undefs; + for (auto &I : Symtab) { + Symbol *Sym = I.second; + auto *Undef = dyn_cast<Undefined>(Sym->body()); + if (!Undef) + continue; + if (!Sym->IsUsedInRegularObj) + continue; + StringRef Name = Undef->getName(); + // A weak alias may have been resolved, so check for that. + if (Defined *D = Undef->getWeakAlias()) { + // We resolve weak aliases by replacing the alias's SymbolBody with the + // target's SymbolBody. This causes all SymbolBody pointers referring to + // the old symbol to instead refer to the new symbol. However, we can't + // just blindly copy sizeof(Symbol::Body) bytes from D to Sym->Body + // because D may be an internal symbol, and internal symbols are stored as + // "unparented" SymbolBodies. For that reason we need to check which type + // of symbol we are dealing with and copy the correct number of bytes. + if (isa<DefinedRegular>(D)) + memcpy(Sym->Body.buffer, D, sizeof(DefinedRegular)); + else if (isa<DefinedAbsolute>(D)) + memcpy(Sym->Body.buffer, D, sizeof(DefinedAbsolute)); + else + // No other internal symbols are possible. + Sym->Body = D->symbol()->Body; + continue; + } + // If we can resolve a symbol by removing __imp_ prefix, do that. + // This odd rule is for compatibility with MSVC linker. + if (Name.startswith("__imp_")) { + Symbol *Imp = find(Name.substr(strlen("__imp_"))); + if (Imp && isa<Defined>(Imp->body())) { + auto *D = cast<Defined>(Imp->body()); + replaceBody<DefinedLocalImport>(Sym, Name, D); + LocalImportChunks.push_back( + cast<DefinedLocalImport>(Sym->body())->getChunk()); + continue; + } + } + // Remaining undefined symbols are not fatal if /force is specified. + // They are replaced with dummy defined symbols. + if (Config->Force) + replaceBody<DefinedAbsolute>(Sym, Name, 0); + Undefs.insert(Sym->body()); + } + if (Undefs.empty()) + return; + for (SymbolBody *B : Config->GCRoot) + if (Undefs.count(B)) + errs() << "<root>: undefined symbol: " << B->getName() << "\n"; + for (ObjectFile *File : ObjectFiles) + for (SymbolBody *Sym : File->getSymbols()) + if (Undefs.count(Sym)) + errs() << toString(File) << ": undefined symbol: " << Sym->getName() + << "\n"; + if (!Config->Force) + fatal("link failed"); +} + +std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) { + Symbol *&Sym = Symtab[CachedHashStringRef(Name)]; + if (Sym) + return {Sym, false}; + Sym = make<Symbol>(); + Sym->IsUsedInRegularObj = false; + Sym->PendingArchiveLoad = false; + return {Sym, true}; +} + +Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F, + bool IsWeakAlias) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (!F || !isa<BitcodeFile>(F)) + S->IsUsedInRegularObj = true; + if (WasInserted || (isa<Lazy>(S->body()) && IsWeakAlias)) { + replaceBody<Undefined>(S, Name); + return S; + } + if (auto *L = dyn_cast<Lazy>(S->body())) { + if (!S->PendingArchiveLoad) { + S->PendingArchiveLoad = true; + L->File->addMember(&L->Sym); + } + } + return S; +} + +void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) { + StringRef Name = Sym.getName(); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + replaceBody<Lazy>(S, F, Sym); + return; + } + auto *U = dyn_cast<Undefined>(S->body()); + if (!U || U->WeakAlias || S->PendingArchiveLoad) + return; + S->PendingArchiveLoad = true; + F->addMember(&Sym); +} + +void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) { + fatal("duplicate symbol: " + toString(*Existing->body()) + " in " + + toString(Existing->body()->getFile()) + " and in " + + (NewFile ? toString(NewFile) : "(internal)")); +} + +Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedAbsolute>(S, N, Sym); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedAbsolute>(S, N, VA); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addRelative(StringRef N, uint64_t VA) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedRelative>(S, N, VA); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addRegular(ObjectFile *F, COFFSymbolRef Sym, + SectionChunk *C) { + StringRef Name; + F->getCOFFObj()->getSymbolName(Sym, Name); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedRegular>(S, F, Sym, C); + else if (auto *R = dyn_cast<DefinedRegular>(S->body())) { + if (!C->isCOMDAT() || !R->isCOMDAT()) + reportDuplicate(S, F); + } else if (auto *B = dyn_cast<DefinedBitcode>(S->body())) { + if (B->IsReplaceable) + replaceBody<DefinedRegular>(S, F, Sym, C); + else if (!C->isCOMDAT()) + reportDuplicate(S, F); + } else + replaceBody<DefinedRegular>(S, F, Sym, C); + return S; +} + +Symbol *SymbolTable::addBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) { + replaceBody<DefinedBitcode>(S, F, N, IsReplaceable); + return S; + } + if (isa<DefinedCommon>(S->body())) + return S; + if (IsReplaceable) + if (isa<DefinedRegular>(S->body()) || isa<DefinedBitcode>(S->body())) + return S; + reportDuplicate(S, F); + return S; +} + +Symbol *SymbolTable::addCommon(ObjectFile *F, COFFSymbolRef Sym, + CommonChunk *C) { + StringRef Name; + F->getCOFFObj()->getSymbolName(Sym, Name); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || !isa<DefinedCOFF>(S->body())) + replaceBody<DefinedCommon>(S, F, Sym, C); + else if (auto *DC = dyn_cast<DefinedCommon>(S->body())) + if (Sym.getValue() > DC->getSize()) + replaceBody<DefinedCommon>(S, F, Sym, C); + return S; +} + +Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedImportData>(S, N, F); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID, + uint16_t Machine) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedImportThunk>(S, Name, ID, Machine); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +std::vector<Chunk *> SymbolTable::getChunks() { + std::vector<Chunk *> Res; + for (ObjectFile *File : ObjectFiles) { + std::vector<Chunk *> &V = File->getChunks(); + Res.insert(Res.end(), V.begin(), V.end()); + } + return Res; +} + +Symbol *SymbolTable::find(StringRef Name) { + auto It = Symtab.find(CachedHashStringRef(Name)); + if (It == Symtab.end()) + return nullptr; + return It->second; +} + +Symbol *SymbolTable::findUnderscore(StringRef Name) { + if (Config->Machine == I386) + return find(("_" + Name).str()); + return find(Name); +} + +StringRef SymbolTable::findByPrefix(StringRef Prefix) { + for (auto Pair : Symtab) { + StringRef Name = Pair.first.val(); + if (Name.startswith(Prefix)) + return Name; + } + return ""; +} + +StringRef SymbolTable::findMangle(StringRef Name) { + if (Symbol *Sym = find(Name)) + if (!isa<Undefined>(Sym->body())) + return Name; + if (Config->Machine != I386) + return findByPrefix(("?" + Name + "@@Y").str()); + if (!Name.startswith("_")) + return ""; + // Search for x86 C function. + StringRef S = findByPrefix((Name + "@").str()); + if (!S.empty()) + return S; + // Search for x86 C++ non-member function. + return findByPrefix(("?" + Name.substr(1) + "@@Y").str()); +} + +void SymbolTable::mangleMaybe(SymbolBody *B) { + auto *U = dyn_cast<Undefined>(B); + if (!U || U->WeakAlias) + return; + StringRef Alias = findMangle(U->getName()); + if (!Alias.empty()) + U->WeakAlias = addUndefined(Alias); +} + +SymbolBody *SymbolTable::addUndefined(StringRef Name) { + return addUndefined(Name, nullptr, false)->body(); +} + +void SymbolTable::printMap(llvm::raw_ostream &OS) { + for (ObjectFile *File : ObjectFiles) { + OS << toString(File) << ":\n"; + for (SymbolBody *Body : File->getSymbols()) + if (auto *R = dyn_cast<DefinedRegular>(Body)) + if (R->getChunk()->isLive()) + OS << Twine::utohexstr(Config->ImageBase + R->getRVA()) + << " " << R->getName() << "\n"; + } +} + +void SymbolTable::addCombinedLTOObjects() { + if (BitcodeFiles.empty()) + return; + + // Create an object file and add it to the symbol table by replacing any + // DefinedBitcode symbols with the definitions in the object file. + LTOCodeGenerator CG(BitcodeFile::Context); + CG.setOptLevel(Config->LTOOptLevel); + for (ObjectFile *Obj : createLTOObjects(&CG)) + Obj->parse(); +} + +// Combine and compile bitcode files and then return the result +// as a vector of regular COFF object files. +std::vector<ObjectFile *> SymbolTable::createLTOObjects(LTOCodeGenerator *CG) { + // All symbols referenced by non-bitcode objects, including GC roots, must be + // preserved. We must also replace bitcode symbols with undefined symbols so + // that they may be replaced with real definitions without conflicting. + for (BitcodeFile *File : BitcodeFiles) + for (SymbolBody *Body : File->getSymbols()) { + if (!isa<DefinedBitcode>(Body)) + continue; + if (Body->symbol()->IsUsedInRegularObj) + CG->addMustPreserveSymbol(Body->getName()); + replaceBody<Undefined>(Body->symbol(), Body->getName()); + } + + CG->setModule(BitcodeFiles[0]->takeModule()); + for (unsigned I = 1, E = BitcodeFiles.size(); I != E; ++I) + CG->addModule(BitcodeFiles[I]->takeModule().get()); + + bool DisableVerify = true; +#ifdef NDEBUG + DisableVerify = false; +#endif + if (!CG->optimize(DisableVerify, false, false, false)) + fatal(""); // optimize() should have emitted any error message. + + Objs.resize(Config->LTOJobs); + // Use std::list to avoid invalidation of pointers in OSPtrs. + std::list<raw_svector_ostream> OSs; + std::vector<raw_pwrite_stream *> OSPtrs; + for (SmallString<0> &Obj : Objs) { + OSs.emplace_back(Obj); + OSPtrs.push_back(&OSs.back()); + } + + if (!CG->compileOptimized(OSPtrs)) + fatal(""); // compileOptimized() should have emitted any error message. + + std::vector<ObjectFile *> ObjFiles; + for (SmallString<0> &Obj : Objs) { + auto *ObjFile = make<ObjectFile>(MemoryBufferRef(Obj, "<LTO object>")); + ObjectFiles.push_back(ObjFile); + ObjFiles.push_back(ObjFile); + } + + return ObjFiles; +} + +} // namespace coff +} // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/SymbolTable.h b/contrib/llvm/tools/lld/COFF/SymbolTable.h new file mode 100644 index 000000000000..703821f2e124 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/SymbolTable.h @@ -0,0 +1,129 @@ +//===- SymbolTable.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_SYMBOL_TABLE_H +#define LLD_COFF_SYMBOL_TABLE_H + +#include "InputFiles.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +struct LTOCodeGenerator; +} + +namespace lld { +namespace coff { + +class Chunk; +class CommonChunk; +class Defined; +class DefinedAbsolute; +class DefinedRelative; +class Lazy; +class SectionChunk; +class SymbolBody; +struct Symbol; + +// SymbolTable is a bucket of all known symbols, including defined, +// undefined, or lazy symbols (the last one is symbols in archive +// files whose archive members are not yet loaded). +// +// We put all symbols of all files to a SymbolTable, and the +// SymbolTable selects the "best" symbols if there are name +// conflicts. For example, obviously, a defined symbol is better than +// an undefined symbol. Or, if there's a conflict between a lazy and a +// undefined, it'll read an archive member to read a real definition +// to replace the lazy symbol. The logic is implemented in the +// add*() functions, which are called by input files as they are parsed. +// There is one add* function per symbol type. +class SymbolTable { +public: + void addFile(InputFile *File); + + // Try to resolve any undefined symbols and update the symbol table + // accordingly, then print an error message for any remaining undefined + // symbols. + void reportRemainingUndefines(); + + // Returns a list of chunks of selected symbols. + std::vector<Chunk *> getChunks(); + + // Returns a symbol for a given name. Returns a nullptr if not found. + Symbol *find(StringRef Name); + Symbol *findUnderscore(StringRef Name); + + // Occasionally we have to resolve an undefined symbol to its + // mangled symbol. This function tries to find a mangled name + // for U from the symbol table, and if found, set the symbol as + // a weak alias for U. + void mangleMaybe(SymbolBody *B); + StringRef findMangle(StringRef Name); + + // Print a layout map to OS. + void printMap(llvm::raw_ostream &OS); + + // Build a set of COFF objects representing the combined contents of + // BitcodeFiles and add them to the symbol table. Called after all files are + // added and before the writer writes results to a file. + void addCombinedLTOObjects(); + + // The writer needs to handle DLL import libraries specially in + // order to create the import descriptor table. + std::vector<ImportFile *> ImportFiles; + + // The writer needs to infer the machine type from the object files. + std::vector<ObjectFile *> ObjectFiles; + + // Creates an Undefined symbol for a given name. + SymbolBody *addUndefined(StringRef Name); + + Symbol *addRelative(StringRef N, uint64_t VA); + Symbol *addAbsolute(StringRef N, uint64_t VA); + + Symbol *addUndefined(StringRef Name, InputFile *F, bool IsWeakAlias); + void addLazy(ArchiveFile *F, const Archive::Symbol Sym); + Symbol *addAbsolute(StringRef N, COFFSymbolRef S); + Symbol *addRegular(ObjectFile *F, COFFSymbolRef S, SectionChunk *C); + Symbol *addBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable); + Symbol *addCommon(ObjectFile *F, COFFSymbolRef S, CommonChunk *C); + Symbol *addImportData(StringRef N, ImportFile *F); + Symbol *addImportThunk(StringRef Name, DefinedImportData *S, + uint16_t Machine); + + void reportDuplicate(Symbol *Existing, InputFile *NewFile); + + // A list of chunks which to be added to .rdata. + std::vector<Chunk *> LocalImportChunks; + +private: + void readArchive(); + void readObjects(); + + std::pair<Symbol *, bool> insert(StringRef Name); + StringRef findByPrefix(StringRef Prefix); + + void addCombinedLTOObject(ObjectFile *Obj); + std::vector<ObjectFile *> createLTOObjects(llvm::LTOCodeGenerator *CG); + + llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> Symtab; + + std::vector<BitcodeFile *> BitcodeFiles; + std::vector<SmallString<0>> Objs; +}; + +extern SymbolTable *Symtab; + +} // namespace coff +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/COFF/Symbols.cpp b/contrib/llvm/tools/lld/COFF/Symbols.cpp new file mode 100644 index 000000000000..c44537d37135 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Symbols.cpp @@ -0,0 +1,84 @@ +//===- Symbols.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Symbols.h" +#include "Error.h" +#include "InputFiles.h" +#include "Memory.h" +#include "Strings.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::object; + +// Returns a symbol name for an error message. +std::string lld::toString(coff::SymbolBody &B) { + if (Optional<std::string> S = coff::demangle(B.getName())) + return ("\"" + *S + "\" (" + B.getName() + ")").str(); + return B.getName(); +} + +namespace lld { +namespace coff { + +StringRef SymbolBody::getName() { + // DefinedCOFF names are read lazily for a performance reason. + // Non-external symbol names are never used by the linker except for logging + // or debugging. Their internal references are resolved not by name but by + // symbol index. And because they are not external, no one can refer them by + // name. Object files contain lots of non-external symbols, and creating + // StringRefs for them (which involves lots of strlen() on the string table) + // is a waste of time. + if (Name.empty()) { + auto *D = cast<DefinedCOFF>(this); + D->File->getCOFFObj()->getSymbolName(D->Sym, Name); + } + return Name; +} + +InputFile *SymbolBody::getFile() { + if (auto *Sym = dyn_cast<DefinedCOFF>(this)) + return Sym->File; + if (auto *Sym = dyn_cast<DefinedBitcode>(this)) + return Sym->File; + if (auto *Sym = dyn_cast<Lazy>(this)) + return Sym->File; + return nullptr; +} + +COFFSymbolRef DefinedCOFF::getCOFFSymbol() { + size_t SymSize = File->getCOFFObj()->getSymbolTableEntrySize(); + if (SymSize == sizeof(coff_symbol16)) + return COFFSymbolRef(reinterpret_cast<const coff_symbol16 *>(Sym)); + assert(SymSize == sizeof(coff_symbol32)); + return COFFSymbolRef(reinterpret_cast<const coff_symbol32 *>(Sym)); +} + +DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S, + uint16_t Machine) + : Defined(DefinedImportThunkKind, Name) { + switch (Machine) { + case AMD64: Data = make<ImportThunkChunkX64>(S); return; + case I386: Data = make<ImportThunkChunkX86>(S); return; + case ARMNT: Data = make<ImportThunkChunkARM>(S); return; + default: llvm_unreachable("unknown machine type"); + } +} + +Defined *Undefined::getWeakAlias() { + // A weak alias may be a weak alias to another symbol, so check recursively. + for (SymbolBody *A = WeakAlias; A; A = cast<Undefined>(A)->WeakAlias) + if (auto *D = dyn_cast<Defined>(A)) + return D; + return nullptr; +} +} // namespace coff +} // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Symbols.h b/contrib/llvm/tools/lld/COFF/Symbols.h new file mode 100644 index 000000000000..1ca7366364d7 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Symbols.h @@ -0,0 +1,436 @@ +//===- Symbols.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_SYMBOLS_H +#define LLD_COFF_SYMBOLS_H + +#include "Chunks.h" +#include "Config.h" +#include "Memory.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" +#include <atomic> +#include <memory> +#include <vector> + +namespace lld { +namespace coff { + +using llvm::object::Archive; +using llvm::object::COFFSymbolRef; +using llvm::object::coff_import_header; +using llvm::object::coff_symbol_generic; + +class ArchiveFile; +class BitcodeFile; +class InputFile; +class ObjectFile; +struct Symbol; +class SymbolTable; + +// The base class for real symbol classes. +class SymbolBody { +public: + enum Kind { + // The order of these is significant. We start with the regular defined + // symbols as those are the most prevelant and the zero tag is the cheapest + // to set. Among the defined kinds, the lower the kind is preferred over + // the higher kind when testing wether one symbol should take precedence + // over another. + DefinedRegularKind = 0, + DefinedCommonKind, + DefinedLocalImportKind, + DefinedImportThunkKind, + DefinedImportDataKind, + DefinedAbsoluteKind, + DefinedRelativeKind, + DefinedBitcodeKind, + + UndefinedKind, + LazyKind, + + LastDefinedCOFFKind = DefinedCommonKind, + LastDefinedKind = DefinedBitcodeKind, + }; + + Kind kind() const { return static_cast<Kind>(SymbolKind); } + + // Returns true if this is an external symbol. + bool isExternal() { return IsExternal; } + + // Returns the symbol name. + StringRef getName(); + + // Returns the file from which this symbol was created. + InputFile *getFile(); + + Symbol *symbol(); + const Symbol *symbol() const { + return const_cast<SymbolBody *>(this)->symbol(); + } + +protected: + friend SymbolTable; + explicit SymbolBody(Kind K, StringRef N = "") + : SymbolKind(K), IsExternal(true), IsCOMDAT(false), + IsReplaceable(false), WrittenToSymtab(false), Name(N) {} + + const unsigned SymbolKind : 8; + unsigned IsExternal : 1; + + // This bit is used by the \c DefinedRegular subclass. + unsigned IsCOMDAT : 1; + + // This bit is used by the \c DefinedBitcode subclass. + unsigned IsReplaceable : 1; + +public: + // This bit is used by Writer::createSymbolAndStringTable(). + unsigned WrittenToSymtab : 1; + +protected: + StringRef Name; +}; + +// The base class for any defined symbols, including absolute symbols, +// etc. +class Defined : public SymbolBody { +public: + Defined(Kind K, StringRef N = "") : SymbolBody(K, N) {} + + static bool classof(const SymbolBody *S) { + return S->kind() <= LastDefinedKind; + } + + // Returns the RVA (relative virtual address) of this symbol. The + // writer sets and uses RVAs. + uint64_t getRVA(); + + // Returns the RVA relative to the beginning of the output section. + // Used to implement SECREL relocation type. + uint64_t getSecrel(); + + // Returns the output section index. + // Used to implement SECTION relocation type. + uint64_t getSectionIndex(); + + // Returns true if this symbol points to an executable (e.g. .text) section. + // Used to implement ARM relocations. + bool isExecutable(); +}; + +// Symbols defined via a COFF object file. +class DefinedCOFF : public Defined { + friend SymbolBody; +public: + DefinedCOFF(Kind K, ObjectFile *F, COFFSymbolRef S) + : Defined(K), File(F), Sym(S.getGeneric()) {} + + static bool classof(const SymbolBody *S) { + return S->kind() <= LastDefinedCOFFKind; + } + + ObjectFile *getFile() { return File; } + + COFFSymbolRef getCOFFSymbol(); + + ObjectFile *File; + +protected: + const coff_symbol_generic *Sym; +}; + +// Regular defined symbols read from object file symbol tables. +class DefinedRegular : public DefinedCOFF { +public: + DefinedRegular(ObjectFile *F, COFFSymbolRef S, SectionChunk *C) + : DefinedCOFF(DefinedRegularKind, F, S), Data(&C->Repl) { + IsExternal = S.isExternal(); + IsCOMDAT = C->isCOMDAT(); + } + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedRegularKind; + } + + uint64_t getRVA() { return (*Data)->getRVA() + Sym->Value; } + bool isCOMDAT() { return IsCOMDAT; } + SectionChunk *getChunk() { return *Data; } + uint32_t getValue() { return Sym->Value; } + +private: + SectionChunk **Data; +}; + +class DefinedCommon : public DefinedCOFF { +public: + DefinedCommon(ObjectFile *F, COFFSymbolRef S, CommonChunk *C) + : DefinedCOFF(DefinedCommonKind, F, S), Data(C) { + IsExternal = S.isExternal(); + } + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedCommonKind; + } + + uint64_t getRVA() { return Data->getRVA(); } + +private: + friend SymbolTable; + uint64_t getSize() { return Sym->Value; } + CommonChunk *Data; +}; + +// Absolute symbols. +class DefinedAbsolute : public Defined { +public: + DefinedAbsolute(StringRef N, COFFSymbolRef S) + : Defined(DefinedAbsoluteKind, N), VA(S.getValue()) { + IsExternal = S.isExternal(); + } + + DefinedAbsolute(StringRef N, uint64_t V) + : Defined(DefinedAbsoluteKind, N), VA(V) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedAbsoluteKind; + } + + uint64_t getRVA() { return VA - Config->ImageBase; } + void setVA(uint64_t V) { VA = V; } + +private: + uint64_t VA; +}; + +// This is a kind of absolute symbol but relative to the image base. +// Unlike absolute symbols, relocations referring this kind of symbols +// are subject of the base relocation. This type is used rarely -- +// mainly for __ImageBase. +class DefinedRelative : public Defined { +public: + explicit DefinedRelative(StringRef Name, uint64_t V = 0) + : Defined(DefinedRelativeKind, Name), RVA(V) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedRelativeKind; + } + + uint64_t getRVA() { return RVA; } + void setRVA(uint64_t V) { RVA = V; } + +private: + uint64_t RVA; +}; + +// This class represents a symbol defined in an archive file. It is +// created from an archive file header, and it knows how to load an +// object file from an archive to replace itself with a defined +// symbol. If the resolver finds both Undefined and Lazy for +// the same name, it will ask the Lazy to load a file. +class Lazy : public SymbolBody { +public: + Lazy(ArchiveFile *F, const Archive::Symbol S) + : SymbolBody(LazyKind, S.getName()), File(F), Sym(S) {} + + static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; } + + ArchiveFile *File; + +private: + friend SymbolTable; + +private: + const Archive::Symbol Sym; +}; + +// Undefined symbols. +class Undefined : public SymbolBody { +public: + explicit Undefined(StringRef N) : SymbolBody(UndefinedKind, N) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == UndefinedKind; + } + + // An undefined symbol can have a fallback symbol which gives an + // undefined symbol a second chance if it would remain undefined. + // If it remains undefined, it'll be replaced with whatever the + // Alias pointer points to. + SymbolBody *WeakAlias = nullptr; + + // If this symbol is external weak, try to resolve it to a defined + // symbol by searching the chain of fallback symbols. Returns the symbol if + // successful, otherwise returns null. + Defined *getWeakAlias(); +}; + +// Windows-specific classes. + +// This class represents a symbol imported from a DLL. This has two +// names for internal use and external use. The former is used for +// name resolution, and the latter is used for the import descriptor +// table in an output. The former has "__imp_" prefix. +class DefinedImportData : public Defined { +public: + DefinedImportData(StringRef N, ImportFile *F) + : Defined(DefinedImportDataKind, N), File(F) { + } + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedImportDataKind; + } + + uint64_t getRVA() { return File->Location->getRVA(); } + StringRef getDLLName() { return File->DLLName; } + StringRef getExternalName() { return File->ExternalName; } + void setLocation(Chunk *AddressTable) { File->Location = AddressTable; } + uint16_t getOrdinal() { return File->Hdr->OrdinalHint; } + +private: + ImportFile *File; +}; + +// This class represents a symbol for a jump table entry which jumps +// to a function in a DLL. Linker are supposed to create such symbols +// without "__imp_" prefix for all function symbols exported from +// DLLs, so that you can call DLL functions as regular functions with +// a regular name. A function pointer is given as a DefinedImportData. +class DefinedImportThunk : public Defined { +public: + DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine); + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedImportThunkKind; + } + + uint64_t getRVA() { return Data->getRVA(); } + Chunk *getChunk() { return Data; } + +private: + Chunk *Data; +}; + +// If you have a symbol "__imp_foo" in your object file, a symbol name +// "foo" becomes automatically available as a pointer to "__imp_foo". +// This class is for such automatically-created symbols. +// Yes, this is an odd feature. We didn't intend to implement that. +// This is here just for compatibility with MSVC. +class DefinedLocalImport : public Defined { +public: + DefinedLocalImport(StringRef N, Defined *S) + : Defined(DefinedLocalImportKind, N), Data(make<LocalImportChunk>(S)) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedLocalImportKind; + } + + uint64_t getRVA() { return Data->getRVA(); } + Chunk *getChunk() { return Data; } + +private: + LocalImportChunk *Data; +}; + +class DefinedBitcode : public Defined { + friend SymbolBody; +public: + DefinedBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) + : Defined(DefinedBitcodeKind, N), File(F) { + // IsReplaceable tracks whether the bitcode symbol may be replaced with some + // other (defined, common or bitcode) symbol. This is the case for common, + // comdat and weak external symbols. We try to replace bitcode symbols with + // "real" symbols (see SymbolTable::add{Regular,Bitcode}), and resolve the + // result against the real symbol from the combined LTO object. + this->IsReplaceable = IsReplaceable; + } + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedBitcodeKind; + } + + BitcodeFile *File; +}; + +inline uint64_t Defined::getRVA() { + switch (kind()) { + case DefinedAbsoluteKind: + return cast<DefinedAbsolute>(this)->getRVA(); + case DefinedRelativeKind: + return cast<DefinedRelative>(this)->getRVA(); + case DefinedImportDataKind: + return cast<DefinedImportData>(this)->getRVA(); + case DefinedImportThunkKind: + return cast<DefinedImportThunk>(this)->getRVA(); + case DefinedLocalImportKind: + return cast<DefinedLocalImport>(this)->getRVA(); + case DefinedCommonKind: + return cast<DefinedCommon>(this)->getRVA(); + case DefinedRegularKind: + return cast<DefinedRegular>(this)->getRVA(); + case DefinedBitcodeKind: + llvm_unreachable("There is no address for a bitcode symbol."); + case LazyKind: + case UndefinedKind: + llvm_unreachable("Cannot get the address for an undefined symbol."); + } + llvm_unreachable("unknown symbol kind"); +} + +// A real symbol object, SymbolBody, is usually stored within a Symbol. There's +// always one Symbol for each symbol name. The resolver updates the SymbolBody +// stored in the Body field of this object as it resolves symbols. Symbol also +// holds computed properties of symbol names. +struct Symbol { + // True if this symbol was referenced by a regular (non-bitcode) object. + unsigned IsUsedInRegularObj : 1; + + // True if we've seen both a lazy and an undefined symbol with this symbol + // name, which means that we have enqueued an archive member load and should + // not load any more archive members to resolve the same symbol. + unsigned PendingArchiveLoad : 1; + + // This field is used to store the Symbol's SymbolBody. This instantiation of + // AlignedCharArrayUnion gives us a struct with a char array field that is + // large and aligned enough to store any derived class of SymbolBody. + llvm::AlignedCharArrayUnion<DefinedRegular, DefinedCommon, DefinedAbsolute, + DefinedRelative, Lazy, Undefined, + DefinedImportData, DefinedImportThunk, + DefinedLocalImport, DefinedBitcode> + Body; + + SymbolBody *body() { + return reinterpret_cast<SymbolBody *>(Body.buffer); + } + const SymbolBody *body() const { return const_cast<Symbol *>(this)->body(); } +}; + +template <typename T, typename... ArgT> +void replaceBody(Symbol *S, ArgT &&... Arg) { + static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); + static_assert(alignof(T) <= alignof(decltype(S->Body)), + "Body not aligned enough"); + assert(static_cast<SymbolBody *>(static_cast<T *>(nullptr)) == nullptr && + "Not a SymbolBody"); + new (S->Body.buffer) T(std::forward<ArgT>(Arg)...); +} + +inline Symbol *SymbolBody::symbol() { + assert(isExternal()); + return reinterpret_cast<Symbol *>(reinterpret_cast<char *>(this) - + offsetof(Symbol, Body)); +} +} // namespace coff + +std::string toString(coff::SymbolBody &B); +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/COFF/Writer.cpp b/contrib/llvm/tools/lld/COFF/Writer.cpp new file mode 100644 index 000000000000..71217ebeb60a --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Writer.cpp @@ -0,0 +1,912 @@ +//===- Writer.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Writer.h" +#include "Config.h" +#include "DLL.h" +#include "Error.h" +#include "InputFiles.h" +#include "Memory.h" +#include "PDB.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "lld/Core/Parallel.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdio> +#include <map> +#include <memory> +#include <utility> + +using namespace llvm; +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace lld; +using namespace lld::coff; + +static const int PageSize = 4096; +static const int SectorSize = 512; +static const int DOSStubSize = 64; +static const int NumberfOfDataDirectory = 16; + +namespace { + +class DebugDirectoryChunk : public Chunk { +public: + DebugDirectoryChunk(const std::vector<std::unique_ptr<Chunk>> &R) + : Records(R) {} + + size_t getSize() const override { + return Records.size() * sizeof(debug_directory); + } + + void writeTo(uint8_t *B) const override { + auto *D = reinterpret_cast<debug_directory *>(B + OutputSectionOff); + + for (const std::unique_ptr<Chunk> &Record : Records) { + D->Characteristics = 0; + D->TimeDateStamp = 0; + D->MajorVersion = 0; + D->MinorVersion = 0; + D->Type = COFF::IMAGE_DEBUG_TYPE_CODEVIEW; + D->SizeOfData = Record->getSize(); + D->AddressOfRawData = Record->getRVA(); + // TODO(compnerd) get the file offset + D->PointerToRawData = 0; + + ++D; + } + } + +private: + const std::vector<std::unique_ptr<Chunk>> &Records; +}; + +class CVDebugRecordChunk : public Chunk { + size_t getSize() const override { + return sizeof(codeview::DebugInfo) + Config->PDBPath.size() + 1; + } + + void writeTo(uint8_t *B) const override { + // Save off the DebugInfo entry to backfill the file signature (build id) + // in Writer::writeBuildId + DI = reinterpret_cast<codeview::DebugInfo *>(B + OutputSectionOff); + + DI->Signature.CVSignature = OMF::Signature::PDB70; + + // variable sized field (PDB Path) + auto *P = reinterpret_cast<char *>(B + OutputSectionOff + sizeof(*DI)); + if (!Config->PDBPath.empty()) + memcpy(P, Config->PDBPath.data(), Config->PDBPath.size()); + P[Config->PDBPath.size()] = '\0'; + } + +public: + mutable codeview::DebugInfo *DI = nullptr; +}; + +// The writer writes a SymbolTable result to a file. +class Writer { +public: + Writer(SymbolTable *T) : Symtab(T) {} + void run(); + +private: + void createSections(); + void createMiscChunks(); + void createImportTables(); + void createExportTable(); + void assignAddresses(); + void removeEmptySections(); + void createSymbolAndStringTable(); + void openFile(StringRef OutputPath); + template <typename PEHeaderTy> void writeHeader(); + void fixSafeSEHSymbols(); + void setSectionPermissions(); + void writeSections(); + void sortExceptionTable(); + void writeBuildId(); + void applyRelocations(); + + llvm::Optional<coff_symbol16> createSymbol(Defined *D); + size_t addEntryToStringTable(StringRef Str); + + OutputSection *findSection(StringRef Name); + OutputSection *createSection(StringRef Name); + void addBaserels(OutputSection *Dest); + void addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V); + + uint32_t getSizeOfInitializedData(); + std::map<StringRef, std::vector<DefinedImportData *>> binImports(); + + SymbolTable *Symtab; + std::unique_ptr<FileOutputBuffer> Buffer; + std::vector<OutputSection *> OutputSections; + std::vector<char> Strtab; + std::vector<llvm::object::coff_symbol16> OutputSymtab; + IdataContents Idata; + DelayLoadContents DelayIdata; + EdataContents Edata; + std::unique_ptr<SEHTableChunk> SEHTable; + + std::unique_ptr<Chunk> DebugDirectory; + std::vector<std::unique_ptr<Chunk>> DebugRecords; + CVDebugRecordChunk *BuildId = nullptr; + ArrayRef<uint8_t> SectionTable; + + uint64_t FileSize; + uint32_t PointerToSymbolTable = 0; + uint64_t SizeOfImage; + uint64_t SizeOfHeaders; + + std::vector<std::unique_ptr<Chunk>> Chunks; +}; +} // anonymous namespace + +namespace lld { +namespace coff { + +void writeResult(SymbolTable *T) { Writer(T).run(); } + +// OutputSection represents a section in an output file. It's a +// container of chunks. OutputSection and Chunk are 1:N relationship. +// Chunks cannot belong to more than one OutputSections. The writer +// creates multiple OutputSections and assign them unique, +// non-overlapping file offsets and RVAs. +class OutputSection { +public: + OutputSection(StringRef N) : Name(N), Header({}) {} + void setRVA(uint64_t); + void setFileOffset(uint64_t); + void addChunk(Chunk *C); + StringRef getName() { return Name; } + std::vector<Chunk *> &getChunks() { return Chunks; } + void addPermissions(uint32_t C); + void setPermissions(uint32_t C); + uint32_t getPermissions() { return Header.Characteristics & PermMask; } + uint32_t getCharacteristics() { return Header.Characteristics; } + uint64_t getRVA() { return Header.VirtualAddress; } + uint64_t getFileOff() { return Header.PointerToRawData; } + void writeHeaderTo(uint8_t *Buf); + + // Returns the size of this section in an executable memory image. + // This may be smaller than the raw size (the raw size is multiple + // of disk sector size, so there may be padding at end), or may be + // larger (if that's the case, the loader reserves spaces after end + // of raw data). + uint64_t getVirtualSize() { return Header.VirtualSize; } + + // Returns the size of the section in the output file. + uint64_t getRawSize() { return Header.SizeOfRawData; } + + // Set offset into the string table storing this section name. + // Used only when the name is longer than 8 bytes. + void setStringTableOff(uint32_t V) { StringTableOff = V; } + + // N.B. The section index is one based. + uint32_t SectionIndex = 0; + +private: + StringRef Name; + coff_section Header; + uint32_t StringTableOff = 0; + std::vector<Chunk *> Chunks; +}; + +void OutputSection::setRVA(uint64_t RVA) { + Header.VirtualAddress = RVA; + for (Chunk *C : Chunks) + C->setRVA(C->getRVA() + RVA); +} + +void OutputSection::setFileOffset(uint64_t Off) { + // If a section has no actual data (i.e. BSS section), we want to + // set 0 to its PointerToRawData. Otherwise the output is rejected + // by the loader. + if (Header.SizeOfRawData == 0) + return; + Header.PointerToRawData = Off; +} + +void OutputSection::addChunk(Chunk *C) { + Chunks.push_back(C); + C->setOutputSection(this); + uint64_t Off = Header.VirtualSize; + Off = alignTo(Off, C->getAlign()); + C->setRVA(Off); + C->setOutputSectionOff(Off); + Off += C->getSize(); + Header.VirtualSize = Off; + if (C->hasData()) + Header.SizeOfRawData = alignTo(Off, SectorSize); +} + +void OutputSection::addPermissions(uint32_t C) { + Header.Characteristics |= C & PermMask; +} + +void OutputSection::setPermissions(uint32_t C) { + Header.Characteristics = C & PermMask; +} + +// Write the section header to a given buffer. +void OutputSection::writeHeaderTo(uint8_t *Buf) { + auto *Hdr = reinterpret_cast<coff_section *>(Buf); + *Hdr = Header; + if (StringTableOff) { + // If name is too long, write offset into the string table as a name. + sprintf(Hdr->Name, "/%d", StringTableOff); + } else { + assert(!Config->Debug || Name.size() <= COFF::NameSize); + strncpy(Hdr->Name, Name.data(), + std::min(Name.size(), (size_t)COFF::NameSize)); + } +} + +uint64_t Defined::getSecrel() { + if (auto *D = dyn_cast<DefinedRegular>(this)) + return getRVA() - D->getChunk()->getOutputSection()->getRVA(); + fatal("SECREL relocation points to a non-regular symbol"); +} + +uint64_t Defined::getSectionIndex() { + if (auto *D = dyn_cast<DefinedRegular>(this)) + return D->getChunk()->getOutputSection()->SectionIndex; + fatal("SECTION relocation points to a non-regular symbol"); +} + +bool Defined::isExecutable() { + const auto X = IMAGE_SCN_MEM_EXECUTE; + if (auto *D = dyn_cast<DefinedRegular>(this)) + return D->getChunk()->getOutputSection()->getPermissions() & X; + return isa<DefinedImportThunk>(this); +} + +} // namespace coff +} // namespace lld + +// The main function of the writer. +void Writer::run() { + createSections(); + createMiscChunks(); + createImportTables(); + createExportTable(); + if (Config->Relocatable) + createSection(".reloc"); + assignAddresses(); + removeEmptySections(); + setSectionPermissions(); + createSymbolAndStringTable(); + openFile(Config->OutputFile); + if (Config->is64()) { + writeHeader<pe32plus_header>(); + } else { + writeHeader<pe32_header>(); + } + fixSafeSEHSymbols(); + writeSections(); + sortExceptionTable(); + writeBuildId(); + + if (!Config->PDBPath.empty()) + createPDB(Config->PDBPath, Symtab, SectionTable, BuildId->DI); + + if (auto EC = Buffer->commit()) + fatal(EC, "failed to write the output file"); +} + +static StringRef getOutputSection(StringRef Name) { + StringRef S = Name.split('$').first; + auto It = Config->Merge.find(S); + if (It == Config->Merge.end()) + return S; + return It->second; +} + +// Create output section objects and add them to OutputSections. +void Writer::createSections() { + // First, bin chunks by name. + std::map<StringRef, std::vector<Chunk *>> Map; + for (Chunk *C : Symtab->getChunks()) { + auto *SC = dyn_cast<SectionChunk>(C); + if (SC && !SC->isLive()) { + if (Config->Verbose) + SC->printDiscardedMessage(); + continue; + } + Map[C->getSectionName()].push_back(C); + } + + // Then create an OutputSection for each section. + // '$' and all following characters in input section names are + // discarded when determining output section. So, .text$foo + // contributes to .text, for example. See PE/COFF spec 3.2. + SmallDenseMap<StringRef, OutputSection *> Sections; + for (auto Pair : Map) { + StringRef Name = getOutputSection(Pair.first); + OutputSection *&Sec = Sections[Name]; + if (!Sec) { + Sec = make<OutputSection>(Name); + OutputSections.push_back(Sec); + } + std::vector<Chunk *> &Chunks = Pair.second; + for (Chunk *C : Chunks) { + Sec->addChunk(C); + Sec->addPermissions(C->getPermissions()); + } + } +} + +void Writer::createMiscChunks() { + OutputSection *RData = createSection(".rdata"); + + // Create thunks for locally-dllimported symbols. + if (!Symtab->LocalImportChunks.empty()) { + for (Chunk *C : Symtab->LocalImportChunks) + RData->addChunk(C); + } + + // Create Debug Information Chunks + if (Config->Debug) { + DebugDirectory = llvm::make_unique<DebugDirectoryChunk>(DebugRecords); + + // TODO(compnerd) create a coffgrp entry if DebugType::CV is not enabled + if (Config->DebugTypes & static_cast<unsigned>(coff::DebugType::CV)) { + auto Chunk = llvm::make_unique<CVDebugRecordChunk>(); + + BuildId = Chunk.get(); + DebugRecords.push_back(std::move(Chunk)); + } + + RData->addChunk(DebugDirectory.get()); + for (const std::unique_ptr<Chunk> &C : DebugRecords) + RData->addChunk(C.get()); + } + + // Create SEH table. x86-only. + if (Config->Machine != I386) + return; + + std::set<Defined *> Handlers; + + for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) { + if (!File->SEHCompat) + return; + for (SymbolBody *B : File->SEHandlers) + Handlers.insert(cast<Defined>(B)); + } + + SEHTable.reset(new SEHTableChunk(Handlers)); + RData->addChunk(SEHTable.get()); +} + +// Create .idata section for the DLL-imported symbol table. +// The format of this section is inherently Windows-specific. +// IdataContents class abstracted away the details for us, +// so we just let it create chunks and add them to the section. +void Writer::createImportTables() { + if (Symtab->ImportFiles.empty()) + return; + + // Initialize DLLOrder so that import entries are ordered in + // the same order as in the command line. (That affects DLL + // initialization order, and this ordering is MSVC-compatible.) + for (ImportFile *File : Symtab->ImportFiles) { + std::string DLL = StringRef(File->DLLName).lower(); + if (Config->DLLOrder.count(DLL) == 0) + Config->DLLOrder[DLL] = Config->DLLOrder.size(); + } + + OutputSection *Text = createSection(".text"); + for (ImportFile *File : Symtab->ImportFiles) { + if (DefinedImportThunk *Thunk = File->ThunkSym) + Text->addChunk(Thunk->getChunk()); + if (Config->DelayLoads.count(StringRef(File->DLLName).lower())) { + DelayIdata.add(File->ImpSym); + } else { + Idata.add(File->ImpSym); + } + } + if (!Idata.empty()) { + OutputSection *Sec = createSection(".idata"); + for (Chunk *C : Idata.getChunks()) + Sec->addChunk(C); + } + if (!DelayIdata.empty()) { + Defined *Helper = cast<Defined>(Config->DelayLoadHelper); + DelayIdata.create(Helper); + OutputSection *Sec = createSection(".didat"); + for (Chunk *C : DelayIdata.getChunks()) + Sec->addChunk(C); + Sec = createSection(".data"); + for (Chunk *C : DelayIdata.getDataChunks()) + Sec->addChunk(C); + Sec = createSection(".text"); + for (std::unique_ptr<Chunk> &C : DelayIdata.getCodeChunks()) + Sec->addChunk(C.get()); + } +} + +void Writer::createExportTable() { + if (Config->Exports.empty()) + return; + OutputSection *Sec = createSection(".edata"); + for (std::unique_ptr<Chunk> &C : Edata.Chunks) + Sec->addChunk(C.get()); +} + +// The Windows loader doesn't seem to like empty sections, +// so we remove them if any. +void Writer::removeEmptySections() { + auto IsEmpty = [](OutputSection *S) { return S->getVirtualSize() == 0; }; + OutputSections.erase( + std::remove_if(OutputSections.begin(), OutputSections.end(), IsEmpty), + OutputSections.end()); + uint32_t Idx = 1; + for (OutputSection *Sec : OutputSections) + Sec->SectionIndex = Idx++; +} + +size_t Writer::addEntryToStringTable(StringRef Str) { + assert(Str.size() > COFF::NameSize); + size_t OffsetOfEntry = Strtab.size() + 4; // +4 for the size field + Strtab.insert(Strtab.end(), Str.begin(), Str.end()); + Strtab.push_back('\0'); + return OffsetOfEntry; +} + +Optional<coff_symbol16> Writer::createSymbol(Defined *Def) { + // Relative symbols are unrepresentable in a COFF symbol table. + if (isa<DefinedRelative>(Def)) + return None; + + if (auto *D = dyn_cast<DefinedRegular>(Def)) + if (!D->getChunk()->isLive()) + return None; + + coff_symbol16 Sym; + StringRef Name = Def->getName(); + if (Name.size() > COFF::NameSize) { + Sym.Name.Offset.Zeroes = 0; + Sym.Name.Offset.Offset = addEntryToStringTable(Name); + } else { + memset(Sym.Name.ShortName, 0, COFF::NameSize); + memcpy(Sym.Name.ShortName, Name.data(), Name.size()); + } + + if (auto *D = dyn_cast<DefinedCOFF>(Def)) { + COFFSymbolRef Ref = D->getCOFFSymbol(); + Sym.Type = Ref.getType(); + Sym.StorageClass = Ref.getStorageClass(); + } else { + Sym.Type = IMAGE_SYM_TYPE_NULL; + Sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL; + } + Sym.NumberOfAuxSymbols = 0; + + switch (Def->kind()) { + case SymbolBody::DefinedAbsoluteKind: + Sym.Value = Def->getRVA(); + Sym.SectionNumber = IMAGE_SYM_ABSOLUTE; + break; + default: { + uint64_t RVA = Def->getRVA(); + OutputSection *Sec = nullptr; + for (OutputSection *S : OutputSections) { + if (S->getRVA() > RVA) + break; + Sec = S; + } + Sym.Value = RVA - Sec->getRVA(); + Sym.SectionNumber = Sec->SectionIndex; + break; + } + } + return Sym; +} + +void Writer::createSymbolAndStringTable() { + if (!Config->Debug || !Config->WriteSymtab) + return; + + // Name field in the section table is 8 byte long. Longer names need + // to be written to the string table. First, construct string table. + for (OutputSection *Sec : OutputSections) { + StringRef Name = Sec->getName(); + if (Name.size() <= COFF::NameSize) + continue; + Sec->setStringTableOff(addEntryToStringTable(Name)); + } + + for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) + for (SymbolBody *B : File->getSymbols()) + if (auto *D = dyn_cast<Defined>(B)) + if (!D->WrittenToSymtab) { + D->WrittenToSymtab = true; + if (Optional<coff_symbol16> Sym = createSymbol(D)) + OutputSymtab.push_back(*Sym); + } + + OutputSection *LastSection = OutputSections.back(); + // We position the symbol table to be adjacent to the end of the last section. + uint64_t FileOff = LastSection->getFileOff() + + alignTo(LastSection->getRawSize(), SectorSize); + if (!OutputSymtab.empty()) { + PointerToSymbolTable = FileOff; + FileOff += OutputSymtab.size() * sizeof(coff_symbol16); + } + if (!Strtab.empty()) + FileOff += Strtab.size() + 4; + FileSize = alignTo(FileOff, SectorSize); +} + +// Visits all sections to assign incremental, non-overlapping RVAs and +// file offsets. +void Writer::assignAddresses() { + SizeOfHeaders = DOSStubSize + sizeof(PEMagic) + sizeof(coff_file_header) + + sizeof(data_directory) * NumberfOfDataDirectory + + sizeof(coff_section) * OutputSections.size(); + SizeOfHeaders += + Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header); + SizeOfHeaders = alignTo(SizeOfHeaders, SectorSize); + uint64_t RVA = 0x1000; // The first page is kept unmapped. + FileSize = SizeOfHeaders; + // Move DISCARDABLE (or non-memory-mapped) sections to the end of file because + // the loader cannot handle holes. + std::stable_partition( + OutputSections.begin(), OutputSections.end(), [](OutputSection *S) { + return (S->getPermissions() & IMAGE_SCN_MEM_DISCARDABLE) == 0; + }); + for (OutputSection *Sec : OutputSections) { + if (Sec->getName() == ".reloc") + addBaserels(Sec); + Sec->setRVA(RVA); + Sec->setFileOffset(FileSize); + RVA += alignTo(Sec->getVirtualSize(), PageSize); + FileSize += alignTo(Sec->getRawSize(), SectorSize); + } + SizeOfImage = SizeOfHeaders + alignTo(RVA - 0x1000, PageSize); +} + +template <typename PEHeaderTy> void Writer::writeHeader() { + // Write DOS stub + uint8_t *Buf = Buffer->getBufferStart(); + auto *DOS = reinterpret_cast<dos_header *>(Buf); + Buf += DOSStubSize; + DOS->Magic[0] = 'M'; + DOS->Magic[1] = 'Z'; + DOS->AddressOfRelocationTable = sizeof(dos_header); + DOS->AddressOfNewExeHeader = DOSStubSize; + + // Write PE magic + memcpy(Buf, PEMagic, sizeof(PEMagic)); + Buf += sizeof(PEMagic); + + // Write COFF header + auto *COFF = reinterpret_cast<coff_file_header *>(Buf); + Buf += sizeof(*COFF); + COFF->Machine = Config->Machine; + COFF->NumberOfSections = OutputSections.size(); + COFF->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE; + if (Config->LargeAddressAware) + COFF->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE; + if (!Config->is64()) + COFF->Characteristics |= IMAGE_FILE_32BIT_MACHINE; + if (Config->DLL) + COFF->Characteristics |= IMAGE_FILE_DLL; + if (!Config->Relocatable) + COFF->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED; + COFF->SizeOfOptionalHeader = + sizeof(PEHeaderTy) + sizeof(data_directory) * NumberfOfDataDirectory; + + // Write PE header + auto *PE = reinterpret_cast<PEHeaderTy *>(Buf); + Buf += sizeof(*PE); + PE->Magic = Config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32; + PE->ImageBase = Config->ImageBase; + PE->SectionAlignment = PageSize; + PE->FileAlignment = SectorSize; + PE->MajorImageVersion = Config->MajorImageVersion; + PE->MinorImageVersion = Config->MinorImageVersion; + PE->MajorOperatingSystemVersion = Config->MajorOSVersion; + PE->MinorOperatingSystemVersion = Config->MinorOSVersion; + PE->MajorSubsystemVersion = Config->MajorOSVersion; + PE->MinorSubsystemVersion = Config->MinorOSVersion; + PE->Subsystem = Config->Subsystem; + PE->SizeOfImage = SizeOfImage; + PE->SizeOfHeaders = SizeOfHeaders; + if (!Config->NoEntry) { + Defined *Entry = cast<Defined>(Config->Entry); + PE->AddressOfEntryPoint = Entry->getRVA(); + // Pointer to thumb code must have the LSB set, so adjust it. + if (Config->Machine == ARMNT) + PE->AddressOfEntryPoint |= 1; + } + PE->SizeOfStackReserve = Config->StackReserve; + PE->SizeOfStackCommit = Config->StackCommit; + PE->SizeOfHeapReserve = Config->HeapReserve; + PE->SizeOfHeapCommit = Config->HeapCommit; + if (Config->DynamicBase) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE; + if (Config->HighEntropyVA) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA; + if (!Config->AllowBind) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND; + if (Config->NxCompat) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT; + if (!Config->AllowIsolation) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION; + if (Config->TerminalServerAware) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; + PE->NumberOfRvaAndSize = NumberfOfDataDirectory; + if (OutputSection *Text = findSection(".text")) { + PE->BaseOfCode = Text->getRVA(); + PE->SizeOfCode = Text->getRawSize(); + } + PE->SizeOfInitializedData = getSizeOfInitializedData(); + + // Write data directory + auto *Dir = reinterpret_cast<data_directory *>(Buf); + Buf += sizeof(*Dir) * NumberfOfDataDirectory; + if (OutputSection *Sec = findSection(".edata")) { + Dir[EXPORT_TABLE].RelativeVirtualAddress = Sec->getRVA(); + Dir[EXPORT_TABLE].Size = Sec->getVirtualSize(); + } + if (!Idata.empty()) { + Dir[IMPORT_TABLE].RelativeVirtualAddress = Idata.getDirRVA(); + Dir[IMPORT_TABLE].Size = Idata.getDirSize(); + Dir[IAT].RelativeVirtualAddress = Idata.getIATRVA(); + Dir[IAT].Size = Idata.getIATSize(); + } + if (OutputSection *Sec = findSection(".rsrc")) { + Dir[RESOURCE_TABLE].RelativeVirtualAddress = Sec->getRVA(); + Dir[RESOURCE_TABLE].Size = Sec->getVirtualSize(); + } + if (OutputSection *Sec = findSection(".pdata")) { + Dir[EXCEPTION_TABLE].RelativeVirtualAddress = Sec->getRVA(); + Dir[EXCEPTION_TABLE].Size = Sec->getVirtualSize(); + } + if (OutputSection *Sec = findSection(".reloc")) { + Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA(); + Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize(); + } + if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) { + if (Defined *B = dyn_cast<Defined>(Sym->body())) { + Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA(); + Dir[TLS_TABLE].Size = Config->is64() + ? sizeof(object::coff_tls_directory64) + : sizeof(object::coff_tls_directory32); + } + } + if (Config->Debug) { + Dir[DEBUG_DIRECTORY].RelativeVirtualAddress = DebugDirectory->getRVA(); + Dir[DEBUG_DIRECTORY].Size = DebugDirectory->getSize(); + } + if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) { + if (auto *B = dyn_cast<DefinedRegular>(Sym->body())) { + SectionChunk *SC = B->getChunk(); + assert(B->getRVA() >= SC->getRVA()); + uint64_t OffsetInChunk = B->getRVA() - SC->getRVA(); + if (!SC->hasData() || OffsetInChunk + 4 > SC->getSize()) + fatal("_load_config_used is malformed"); + + ArrayRef<uint8_t> SecContents = SC->getContents(); + uint32_t LoadConfigSize = + *reinterpret_cast<const ulittle32_t *>(&SecContents[OffsetInChunk]); + if (OffsetInChunk + LoadConfigSize > SC->getSize()) + fatal("_load_config_used is too large"); + Dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = B->getRVA(); + Dir[LOAD_CONFIG_TABLE].Size = LoadConfigSize; + } + } + if (!DelayIdata.empty()) { + Dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = + DelayIdata.getDirRVA(); + Dir[DELAY_IMPORT_DESCRIPTOR].Size = DelayIdata.getDirSize(); + } + + // Write section table + for (OutputSection *Sec : OutputSections) { + Sec->writeHeaderTo(Buf); + Buf += sizeof(coff_section); + } + SectionTable = ArrayRef<uint8_t>( + Buf - OutputSections.size() * sizeof(coff_section), Buf); + + if (OutputSymtab.empty()) + return; + + COFF->PointerToSymbolTable = PointerToSymbolTable; + uint32_t NumberOfSymbols = OutputSymtab.size(); + COFF->NumberOfSymbols = NumberOfSymbols; + auto *SymbolTable = reinterpret_cast<coff_symbol16 *>( + Buffer->getBufferStart() + COFF->PointerToSymbolTable); + for (size_t I = 0; I != NumberOfSymbols; ++I) + SymbolTable[I] = OutputSymtab[I]; + // Create the string table, it follows immediately after the symbol table. + // The first 4 bytes is length including itself. + Buf = reinterpret_cast<uint8_t *>(&SymbolTable[NumberOfSymbols]); + write32le(Buf, Strtab.size() + 4); + if (!Strtab.empty()) + memcpy(Buf + 4, Strtab.data(), Strtab.size()); +} + +void Writer::openFile(StringRef Path) { + Buffer = check( + FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable), + "failed to open " + Path); +} + +void Writer::fixSafeSEHSymbols() { + if (!SEHTable) + return; + if (auto *T = dyn_cast<DefinedRelative>(Config->SEHTable->body())) + T->setRVA(SEHTable->getRVA()); + if (auto *C = dyn_cast<DefinedAbsolute>(Config->SEHCount->body())) + C->setVA(SEHTable->getSize() / 4); +} + +// Handles /section options to allow users to overwrite +// section attributes. +void Writer::setSectionPermissions() { + for (auto &P : Config->Section) { + StringRef Name = P.first; + uint32_t Perm = P.second; + if (auto *Sec = findSection(Name)) + Sec->setPermissions(Perm); + } +} + +// Write section contents to a mmap'ed file. +void Writer::writeSections() { + uint8_t *Buf = Buffer->getBufferStart(); + for (OutputSection *Sec : OutputSections) { + uint8_t *SecBuf = Buf + Sec->getFileOff(); + // Fill gaps between functions in .text with INT3 instructions + // instead of leaving as NUL bytes (which can be interpreted as + // ADD instructions). + if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE) + memset(SecBuf, 0xCC, Sec->getRawSize()); + parallel_for_each(Sec->getChunks().begin(), Sec->getChunks().end(), + [&](Chunk *C) { C->writeTo(SecBuf); }); + } +} + +// Sort .pdata section contents according to PE/COFF spec 5.5. +void Writer::sortExceptionTable() { + OutputSection *Sec = findSection(".pdata"); + if (!Sec) + return; + // We assume .pdata contains function table entries only. + uint8_t *Begin = Buffer->getBufferStart() + Sec->getFileOff(); + uint8_t *End = Begin + Sec->getVirtualSize(); + if (Config->Machine == AMD64) { + struct Entry { ulittle32_t Begin, End, Unwind; }; + parallel_sort( + (Entry *)Begin, (Entry *)End, + [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); + return; + } + if (Config->Machine == ARMNT) { + struct Entry { ulittle32_t Begin, Unwind; }; + parallel_sort( + (Entry *)Begin, (Entry *)End, + [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); + return; + } + errs() << "warning: don't know how to handle .pdata.\n"; +} + +// Backfill the CVSignature in a PDB70 Debug Record. This backfilling allows us +// to get reproducible builds. +void Writer::writeBuildId() { + // There is nothing to backfill if BuildId was not setup. + if (BuildId == nullptr) + return; + + MD5 Hash; + MD5::MD5Result Res; + + Hash.update(ArrayRef<uint8_t>{Buffer->getBufferStart(), + Buffer->getBufferEnd()}); + Hash.final(Res); + + assert(BuildId->DI->Signature.CVSignature == OMF::Signature::PDB70 && + "only PDB 7.0 is supported"); + assert(sizeof(Res) == sizeof(BuildId->DI->PDB70.Signature) && + "signature size mismatch"); + memcpy(BuildId->DI->PDB70.Signature, Res, + sizeof(codeview::PDB70DebugInfo::Signature)); + // TODO(compnerd) track the Age + BuildId->DI->PDB70.Age = 1; +} + +OutputSection *Writer::findSection(StringRef Name) { + for (OutputSection *Sec : OutputSections) + if (Sec->getName() == Name) + return Sec; + return nullptr; +} + +uint32_t Writer::getSizeOfInitializedData() { + uint32_t Res = 0; + for (OutputSection *S : OutputSections) + if (S->getPermissions() & IMAGE_SCN_CNT_INITIALIZED_DATA) + Res += S->getRawSize(); + return Res; +} + +// Returns an existing section or create a new one if not found. +OutputSection *Writer::createSection(StringRef Name) { + if (auto *Sec = findSection(Name)) + return Sec; + const auto DATA = IMAGE_SCN_CNT_INITIALIZED_DATA; + const auto BSS = IMAGE_SCN_CNT_UNINITIALIZED_DATA; + const auto CODE = IMAGE_SCN_CNT_CODE; + const auto DISCARDABLE = IMAGE_SCN_MEM_DISCARDABLE; + const auto R = IMAGE_SCN_MEM_READ; + const auto W = IMAGE_SCN_MEM_WRITE; + const auto X = IMAGE_SCN_MEM_EXECUTE; + uint32_t Perms = StringSwitch<uint32_t>(Name) + .Case(".bss", BSS | R | W) + .Case(".data", DATA | R | W) + .Cases(".didat", ".edata", ".idata", ".rdata", DATA | R) + .Case(".reloc", DATA | DISCARDABLE | R) + .Case(".text", CODE | R | X) + .Default(0); + if (!Perms) + llvm_unreachable("unknown section name"); + auto Sec = make<OutputSection>(Name); + Sec->addPermissions(Perms); + OutputSections.push_back(Sec); + return Sec; +} + +// Dest is .reloc section. Add contents to that section. +void Writer::addBaserels(OutputSection *Dest) { + std::vector<Baserel> V; + for (OutputSection *Sec : OutputSections) { + if (Sec == Dest) + continue; + // Collect all locations for base relocations. + for (Chunk *C : Sec->getChunks()) + C->getBaserels(&V); + // Add the addresses to .reloc section. + if (!V.empty()) + addBaserelBlocks(Dest, V); + V.clear(); + } +} + +// Add addresses to .reloc section. Note that addresses are grouped by page. +void Writer::addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V) { + const uint32_t Mask = ~uint32_t(PageSize - 1); + uint32_t Page = V[0].RVA & Mask; + size_t I = 0, J = 1; + for (size_t E = V.size(); J < E; ++J) { + uint32_t P = V[J].RVA & Mask; + if (P == Page) + continue; + Dest->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J)); + I = J; + Page = P; + } + if (I == J) + return; + Dest->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J)); +} diff --git a/contrib/llvm/tools/lld/COFF/Writer.h b/contrib/llvm/tools/lld/COFF/Writer.h new file mode 100644 index 000000000000..0d26090177d8 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Writer.h @@ -0,0 +1,24 @@ +//===- Writer.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_WRITER_H +#define LLD_COFF_WRITER_H + +#include <vector> + +namespace lld { +namespace coff { +class SymbolTable; + +void writeResult(SymbolTable *T); + +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/CMakeLists.txt b/contrib/llvm/tools/lld/ELF/CMakeLists.txt new file mode 100644 index 000000000000..2e9d2b941fd9 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/CMakeLists.txt @@ -0,0 +1,61 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(ELFOptionsTableGen) + +if(NOT LLD_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + +add_lld_library(lldELF + Driver.cpp + DriverUtils.cpp + EhFrame.cpp + Error.cpp + GdbIndex.cpp + ICF.cpp + InputFiles.cpp + InputSection.cpp + LTO.cpp + LinkerScript.cpp + MarkLive.cpp + Mips.cpp + OutputSections.cpp + Relocations.cpp + ScriptParser.cpp + Strings.cpp + SymbolTable.cpp + Symbols.cpp + SyntheticSections.cpp + Target.cpp + Thunks.cpp + Writer.cpp + + LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Analysis + BitReader + BitWriter + Codegen + Core + DebugInfoDWARF + Demangle + IPO + Linker + LTO + Object + Option + Passes + MC + Support + Target + TransformUtils + + LINK_LIBS + lldConfig + lldCore + ${PTHREAD_LIB} + + DEPENDS + ELFOptionsTableGen + ${tablegen_deps} + ) diff --git a/contrib/llvm/tools/lld/ELF/Config.h b/contrib/llvm/tools/lld/ELF/Config.h new file mode 100644 index 000000000000..b828cdb25047 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Config.h @@ -0,0 +1,167 @@ +//===- Config.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_CONFIG_H +#define LLD_ELF_CONFIG_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/ELF.h" + +#include <vector> + +namespace lld { +namespace elf { + +class InputFile; +struct Symbol; + +enum ELFKind { + ELFNoneKind, + ELF32LEKind, + ELF32BEKind, + ELF64LEKind, + ELF64BEKind +}; + +// For --build-id. +enum class BuildIdKind { None, Fast, Md5, Sha1, Hexstring, Uuid }; + +// For --discard-{all,locals,none} and --retain-symbols-file. +enum class DiscardPolicy { Default, All, Locals, RetainFile, None }; + +// For --strip-{all,debug}. +enum class StripPolicy { None, All, Debug }; + +// For --unresolved-symbols. +enum class UnresolvedPolicy { NoUndef, ReportError, Warn, Ignore }; + +// For --sort-section and linkerscript sorting rules. +enum class SortSectionPolicy { Default, None, Alignment, Name, Priority }; + +// For --target2 +enum class Target2Policy { Abs, Rel, GotRel }; + +struct SymbolVersion { + llvm::StringRef Name; + bool IsExternCpp; + bool HasWildcard; +}; + +// This struct contains symbols version definition that +// can be found in version script if it is used for link. +struct VersionDefinition { + VersionDefinition(llvm::StringRef Name, uint16_t Id) : Name(Name), Id(Id) {} + llvm::StringRef Name; + uint16_t Id; + std::vector<SymbolVersion> Globals; + size_t NameOff; // Offset in string table. +}; + +// This struct contains the global configuration for the linker. +// Most fields are direct mapping from the command line options +// and such fields have the same name as the corresponding options. +// Most fields are initialized by the driver. +struct Configuration { + InputFile *FirstElf = nullptr; + uint8_t OSABI = 0; + llvm::StringMap<uint64_t> SectionStartMap; + llvm::StringRef DynamicLinker; + llvm::StringRef Entry; + llvm::StringRef Emulation; + llvm::StringRef Fini; + llvm::StringRef Init; + llvm::StringRef LTOAAPipeline; + llvm::StringRef LTONewPmPasses; + llvm::StringRef OutputFile; + llvm::StringRef SoName; + llvm::StringRef Sysroot; + llvm::StringSet<> RetainSymbolsFile; + std::string RPath; + std::vector<VersionDefinition> VersionDefinitions; + std::vector<llvm::StringRef> AuxiliaryList; + std::vector<llvm::StringRef> SearchPaths; + std::vector<llvm::StringRef> SymbolOrderingFile; + std::vector<llvm::StringRef> Undefined; + std::vector<SymbolVersion> VersionScriptGlobals; + std::vector<SymbolVersion> VersionScriptLocals; + std::vector<uint8_t> BuildIdVector; + bool AllowMultipleDefinition; + bool AsNeeded = false; + bool Bsymbolic; + bool BsymbolicFunctions; + bool ColorDiagnostics = false; + bool Demangle = true; + bool DisableVerify; + bool EhFrameHdr; + bool EnableNewDtags; + bool ExportDynamic; + bool FatalWarnings; + bool GcSections; + bool GdbIndex; + bool GnuHash = false; + bool ICF; + bool Mips64EL = false; + bool MipsN32Abi = false; + bool NoGnuUnique; + bool NoUndefinedVersion; + bool Nostdlib; + bool OFormatBinary; + bool OMagic; + bool Pic; + bool Pie; + bool PrintGcSections; + bool Rela; + bool Relocatable; + bool SaveTemps; + bool SingleRoRx; + bool Shared; + bool Static = false; + bool SysvHash = true; + bool Target1Rel; + bool Threads; + bool Trace; + bool Verbose; + bool WarnCommon; + bool WarnMissingEntry; + bool ZCombreloc; + bool ZExecstack; + bool ZNodelete; + bool ZNow; + bool ZOrigin; + bool ZRelro; + bool ExitEarly; + bool ZWxneeded; + DiscardPolicy Discard; + SortSectionPolicy SortSection; + StripPolicy Strip = StripPolicy::None; + UnresolvedPolicy UnresolvedSymbols; + Target2Policy Target2 = Target2Policy::GotRel; + BuildIdKind BuildId = BuildIdKind::None; + ELFKind EKind = ELFNoneKind; + uint16_t DefaultSymbolVersion = llvm::ELF::VER_NDX_GLOBAL; + uint16_t EMachine = llvm::ELF::EM_NONE; + uint64_t ErrorLimit = 20; + uint64_t ImageBase; + uint64_t MaxPageSize; + uint64_t ZStackSize; + unsigned LTOPartitions; + unsigned LTOO; + unsigned Optimize; + unsigned ThinLTOJobs; +}; + +// The only instance of Configuration struct. +extern Configuration *Config; + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Driver.cpp b/contrib/llvm/tools/lld/ELF/Driver.cpp new file mode 100644 index 000000000000..c8ea821ec522 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Driver.cpp @@ -0,0 +1,826 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Config.h" +#include "Error.h" +#include "ICF.h" +#include "InputFiles.h" +#include "InputSection.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "Target.h" +#include "Threads.h" +#include "Writer.h" +#include "lld/Config/Version.h" +#include "lld/Driver/Driver.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/TarWriter.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdlib> +#include <utility> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::sys; + +using namespace lld; +using namespace lld::elf; + +Configuration *elf::Config; +LinkerDriver *elf::Driver; + +BumpPtrAllocator elf::BAlloc; +StringSaver elf::Saver{BAlloc}; +std::vector<SpecificAllocBase *> elf::SpecificAllocBase::Instances; + +bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly, + raw_ostream &Error) { + ErrorCount = 0; + ErrorOS = &Error; + Argv0 = Args[0]; + Tar = nullptr; + + Config = make<Configuration>(); + Driver = make<LinkerDriver>(); + ScriptConfig = make<ScriptConfiguration>(); + + Driver->main(Args, CanExitEarly); + freeArena(); + return !ErrorCount; +} + +// Parses a linker -m option. +static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef Emul) { + uint8_t OSABI = 0; + StringRef S = Emul; + if (S.endswith("_fbsd")) { + S = S.drop_back(5); + OSABI = ELFOSABI_FREEBSD; + } + + std::pair<ELFKind, uint16_t> Ret = + StringSwitch<std::pair<ELFKind, uint16_t>>(S) + .Cases("aarch64elf", "aarch64linux", {ELF64LEKind, EM_AARCH64}) + .Case("armelf_linux_eabi", {ELF32LEKind, EM_ARM}) + .Case("elf32_x86_64", {ELF32LEKind, EM_X86_64}) + .Case("elf32btsmip", {ELF32BEKind, EM_MIPS}) + .Case("elf32ltsmip", {ELF32LEKind, EM_MIPS}) + .Case("elf32btsmipn32", {ELF32BEKind, EM_MIPS}) + .Case("elf32ltsmipn32", {ELF32LEKind, EM_MIPS}) + .Case("elf32ppc", {ELF32BEKind, EM_PPC}) + .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) + .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) + .Case("elf64ppc", {ELF64BEKind, EM_PPC64}) + .Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64}) + .Case("elf_i386", {ELF32LEKind, EM_386}) + .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU}) + .Default({ELFNoneKind, EM_NONE}); + + if (Ret.first == ELFNoneKind) { + if (S == "i386pe" || S == "i386pep" || S == "thumb2pe") + error("Windows targets are not supported on the ELF frontend: " + Emul); + else + error("unknown emulation: " + Emul); + } + return std::make_tuple(Ret.first, Ret.second, OSABI); +} + +// Returns slices of MB by parsing MB as an archive file. +// Each slice consists of a member file in the archive. +std::vector<MemoryBufferRef> +LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { + std::unique_ptr<Archive> File = + check(Archive::create(MB), + MB.getBufferIdentifier() + ": failed to parse archive"); + + std::vector<MemoryBufferRef> V; + Error Err = Error::success(); + for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { + Archive::Child C = + check(COrErr, MB.getBufferIdentifier() + + ": could not get the child of the archive"); + MemoryBufferRef MBRef = + check(C.getMemoryBufferRef(), + MB.getBufferIdentifier() + + ": could not get the buffer for a child of the archive"); + V.push_back(MBRef); + } + if (Err) + fatal(MB.getBufferIdentifier() + ": Archive::children failed: " + + toString(std::move(Err))); + + // Take ownership of memory buffers created for members of thin archives. + for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers()) + make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); + + return V; +} + +// Opens and parses a file. Path has to be resolved already. +// Newly created memory buffers are owned by this driver. +void LinkerDriver::addFile(StringRef Path) { + using namespace sys::fs; + + Optional<MemoryBufferRef> Buffer = readFile(Path); + if (!Buffer.hasValue()) + return; + MemoryBufferRef MBRef = *Buffer; + + if (InBinary) { + Files.push_back(make<BinaryFile>(MBRef)); + return; + } + + switch (identify_magic(MBRef.getBuffer())) { + case file_magic::unknown: + readLinkerScript(MBRef); + return; + case file_magic::archive: + if (InWholeArchive) { + for (MemoryBufferRef MB : getArchiveMembers(MBRef)) + Files.push_back(createObjectFile(MB, Path)); + return; + } + Files.push_back(make<ArchiveFile>(MBRef)); + return; + case file_magic::elf_shared_object: + if (Config->Relocatable) { + error("attempted static link of dynamic object " + Path); + return; + } + Files.push_back(createSharedFile(MBRef)); + return; + default: + if (InLib) + Files.push_back(make<LazyObjectFile>(MBRef)); + else + Files.push_back(createObjectFile(MBRef)); + } +} + +// Add a given library by searching it from input search paths. +void LinkerDriver::addLibrary(StringRef Name) { + if (Optional<std::string> Path = searchLibrary(Name)) + addFile(*Path); + else + error("unable to find library -l" + Name); +} + +// This function is called on startup. We need this for LTO since +// LTO calls LLVM functions to compile bitcode files to native code. +// Technically this can be delayed until we read bitcode files, but +// we don't bother to do lazily because the initialization is fast. +static void initLLVM(opt::InputArgList &Args) { + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + + // Parse and evaluate -mllvm options. + std::vector<const char *> V; + V.push_back("lld (LLVM option parsing)"); + for (auto *Arg : Args.filtered(OPT_mllvm)) + V.push_back(Arg->getValue()); + cl::ParseCommandLineOptions(V.size(), V.data()); +} + +// Some command line options or some combinations of them are not allowed. +// This function checks for such errors. +static void checkOptions(opt::InputArgList &Args) { + // The MIPS ABI as of 2016 does not support the GNU-style symbol lookup + // table which is a relatively new feature. + if (Config->EMachine == EM_MIPS && Config->GnuHash) + error("the .gnu.hash section is not compatible with the MIPS target."); + + if (Config->Pie && Config->Shared) + error("-shared and -pie may not be used together"); + + if (Config->Relocatable) { + if (Config->Shared) + error("-r and -shared may not be used together"); + if (Config->GcSections) + error("-r and --gc-sections may not be used together"); + if (Config->ICF) + error("-r and --icf may not be used together"); + if (Config->Pie) + error("-r and -pie may not be used together"); + } +} + +static StringRef getString(opt::InputArgList &Args, unsigned Key, + StringRef Default = "") { + if (auto *Arg = Args.getLastArg(Key)) + return Arg->getValue(); + return Default; +} + +static int getInteger(opt::InputArgList &Args, unsigned Key, int Default) { + int V = Default; + if (auto *Arg = Args.getLastArg(Key)) { + StringRef S = Arg->getValue(); + if (S.getAsInteger(10, V)) + error(Arg->getSpelling() + ": number expected, but got " + S); + } + return V; +} + +static const char *getReproduceOption(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_reproduce)) + return Arg->getValue(); + return getenv("LLD_REPRODUCE"); +} + +static bool hasZOption(opt::InputArgList &Args, StringRef Key) { + for (auto *Arg : Args.filtered(OPT_z)) + if (Key == Arg->getValue()) + return true; + return false; +} + +static uint64_t getZOptionValue(opt::InputArgList &Args, StringRef Key, + uint64_t Default) { + for (auto *Arg : Args.filtered(OPT_z)) { + StringRef Value = Arg->getValue(); + size_t Pos = Value.find("="); + if (Pos != StringRef::npos && Key == Value.substr(0, Pos)) { + Value = Value.substr(Pos + 1); + uint64_t Result; + if (Value.getAsInteger(0, Result)) + error("invalid " + Key + ": " + Value); + return Result; + } + } + return Default; +} + +void LinkerDriver::main(ArrayRef<const char *> ArgsArr, bool CanExitEarly) { + ELFOptTable Parser; + opt::InputArgList Args = Parser.parse(ArgsArr.slice(1)); + + // Interpret this flag early because error() depends on them. + Config->ErrorLimit = getInteger(Args, OPT_error_limit, 20); + + // Handle -help + if (Args.hasArg(OPT_help)) { + printHelp(ArgsArr[0]); + return; + } + + // GNU linkers disagree here. Though both -version and -v are mentioned + // in help to print the version information, GNU ld just normally exits, + // while gold can continue linking. We are compatible with ld.bfd here. + if (Args.hasArg(OPT_version) || Args.hasArg(OPT_v)) + outs() << getLLDVersion() << "\n"; + if (Args.hasArg(OPT_version)) + return; + + Config->ExitEarly = CanExitEarly && !Args.hasArg(OPT_full_shutdown); + + if (const char *Path = getReproduceOption(Args)) { + // Note that --reproduce is a debug option so you can ignore it + // if you are trying to understand the whole picture of the code. + Expected<std::unique_ptr<TarWriter>> ErrOrWriter = + TarWriter::create(Path, path::stem(Path)); + if (ErrOrWriter) { + Tar = ErrOrWriter->get(); + Tar->append("response.txt", createResponseFile(Args)); + Tar->append("version.txt", getLLDVersion() + "\n"); + make<std::unique_ptr<TarWriter>>(std::move(*ErrOrWriter)); + } else { + error(Twine("--reproduce: failed to open ") + Path + ": " + + toString(ErrOrWriter.takeError())); + } + } + + readConfigs(Args); + initLLVM(Args); + createFiles(Args); + inferMachineType(); + checkOptions(Args); + if (ErrorCount) + return; + + switch (Config->EKind) { + case ELF32LEKind: + link<ELF32LE>(Args); + return; + case ELF32BEKind: + link<ELF32BE>(Args); + return; + case ELF64LEKind: + link<ELF64LE>(Args); + return; + case ELF64BEKind: + link<ELF64BE>(Args); + return; + default: + llvm_unreachable("unknown Config->EKind"); + } +} + +static UnresolvedPolicy getUnresolvedSymbolOption(opt::InputArgList &Args) { + if (Args.hasArg(OPT_noinhibit_exec)) + return UnresolvedPolicy::Warn; + if (Args.hasArg(OPT_no_undefined) || hasZOption(Args, "defs")) + return UnresolvedPolicy::NoUndef; + if (Config->Relocatable) + return UnresolvedPolicy::Ignore; + + if (auto *Arg = Args.getLastArg(OPT_unresolved_symbols)) { + StringRef S = Arg->getValue(); + if (S == "ignore-all" || S == "ignore-in-object-files") + return UnresolvedPolicy::Ignore; + if (S == "ignore-in-shared-libs" || S == "report-all") + return UnresolvedPolicy::ReportError; + error("unknown --unresolved-symbols value: " + S); + } + return UnresolvedPolicy::ReportError; +} + +static Target2Policy getTarget2Option(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_target2)) { + StringRef S = Arg->getValue(); + if (S == "rel") + return Target2Policy::Rel; + if (S == "abs") + return Target2Policy::Abs; + if (S == "got-rel") + return Target2Policy::GotRel; + error("unknown --target2 option: " + S); + } + return Target2Policy::GotRel; +} + +static bool isOutputFormatBinary(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_oformat)) { + StringRef S = Arg->getValue(); + if (S == "binary") + return true; + error("unknown --oformat value: " + S); + } + return false; +} + +static bool getArg(opt::InputArgList &Args, unsigned K1, unsigned K2, + bool Default) { + if (auto *Arg = Args.getLastArg(K1, K2)) + return Arg->getOption().getID() == K1; + return Default; +} + +static DiscardPolicy getDiscardOption(opt::InputArgList &Args) { + if (Config->Relocatable) + return DiscardPolicy::None; + auto *Arg = + Args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none); + if (!Arg) + return DiscardPolicy::Default; + if (Arg->getOption().getID() == OPT_discard_all) + return DiscardPolicy::All; + if (Arg->getOption().getID() == OPT_discard_locals) + return DiscardPolicy::Locals; + return DiscardPolicy::None; +} + +static StripPolicy getStripOption(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_strip_all, OPT_strip_debug)) { + if (Arg->getOption().getID() == OPT_strip_all) + return StripPolicy::All; + return StripPolicy::Debug; + } + return StripPolicy::None; +} + +static uint64_t parseSectionAddress(StringRef S, opt::Arg *Arg) { + uint64_t VA = 0; + if (S.startswith("0x")) + S = S.drop_front(2); + if (S.getAsInteger(16, VA)) + error("invalid argument: " + toString(Arg)); + return VA; +} + +static StringMap<uint64_t> getSectionStartMap(opt::InputArgList &Args) { + StringMap<uint64_t> Ret; + for (auto *Arg : Args.filtered(OPT_section_start)) { + StringRef Name; + StringRef Addr; + std::tie(Name, Addr) = StringRef(Arg->getValue()).split('='); + Ret[Name] = parseSectionAddress(Addr, Arg); + } + + if (auto *Arg = Args.getLastArg(OPT_Ttext)) + Ret[".text"] = parseSectionAddress(Arg->getValue(), Arg); + if (auto *Arg = Args.getLastArg(OPT_Tdata)) + Ret[".data"] = parseSectionAddress(Arg->getValue(), Arg); + if (auto *Arg = Args.getLastArg(OPT_Tbss)) + Ret[".bss"] = parseSectionAddress(Arg->getValue(), Arg); + return Ret; +} + +static SortSectionPolicy getSortKind(opt::InputArgList &Args) { + StringRef S = getString(Args, OPT_sort_section); + if (S == "alignment") + return SortSectionPolicy::Alignment; + if (S == "name") + return SortSectionPolicy::Name; + if (!S.empty()) + error("unknown --sort-section rule: " + S); + return SortSectionPolicy::Default; +} + +static std::vector<StringRef> getLines(MemoryBufferRef MB) { + SmallVector<StringRef, 0> Arr; + MB.getBuffer().split(Arr, '\n'); + + std::vector<StringRef> Ret; + for (StringRef S : Arr) { + S = S.trim(); + if (!S.empty()) + Ret.push_back(S); + } + return Ret; +} + +// Initializes Config members by the command line options. +void LinkerDriver::readConfigs(opt::InputArgList &Args) { + for (auto *Arg : Args.filtered(OPT_L)) + Config->SearchPaths.push_back(Arg->getValue()); + + std::vector<StringRef> RPaths; + for (auto *Arg : Args.filtered(OPT_rpath)) + RPaths.push_back(Arg->getValue()); + if (!RPaths.empty()) + Config->RPath = llvm::join(RPaths.begin(), RPaths.end(), ":"); + + if (auto *Arg = Args.getLastArg(OPT_m)) { + // Parse ELF{32,64}{LE,BE} and CPU type. + StringRef S = Arg->getValue(); + std::tie(Config->EKind, Config->EMachine, Config->OSABI) = + parseEmulation(S); + Config->MipsN32Abi = (S == "elf32btsmipn32" || S == "elf32ltsmipn32"); + Config->Emulation = S; + } + + Config->AllowMultipleDefinition = Args.hasArg(OPT_allow_multiple_definition); + Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic); + Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions); + Config->Demangle = getArg(Args, OPT_demangle, OPT_no_demangle, true); + Config->DisableVerify = Args.hasArg(OPT_disable_verify); + Config->EhFrameHdr = Args.hasArg(OPT_eh_frame_hdr); + Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags); + Config->ExportDynamic = Args.hasArg(OPT_export_dynamic); + Config->FatalWarnings = Args.hasArg(OPT_fatal_warnings); + Config->GcSections = getArg(Args, OPT_gc_sections, OPT_no_gc_sections, false); + Config->GdbIndex = Args.hasArg(OPT_gdb_index); + Config->ICF = Args.hasArg(OPT_icf); + Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique); + Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version); + Config->Nostdlib = Args.hasArg(OPT_nostdlib); + Config->OMagic = Args.hasArg(OPT_omagic); + Config->Pie = getArg(Args, OPT_pie, OPT_nopie, false); + Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections); + Config->Relocatable = Args.hasArg(OPT_relocatable); + Config->Discard = getDiscardOption(Args); + Config->SaveTemps = Args.hasArg(OPT_save_temps); + Config->SingleRoRx = Args.hasArg(OPT_no_rosegment); + Config->Shared = Args.hasArg(OPT_shared); + Config->Target1Rel = getArg(Args, OPT_target1_rel, OPT_target1_abs, false); + Config->Threads = getArg(Args, OPT_threads, OPT_no_threads, true); + Config->Trace = Args.hasArg(OPT_trace); + Config->Verbose = Args.hasArg(OPT_verbose); + Config->WarnCommon = Args.hasArg(OPT_warn_common); + + Config->DynamicLinker = getString(Args, OPT_dynamic_linker); + Config->Entry = getString(Args, OPT_entry); + Config->Fini = getString(Args, OPT_fini, "_fini"); + Config->Init = getString(Args, OPT_init, "_init"); + Config->LTOAAPipeline = getString(Args, OPT_lto_aa_pipeline); + Config->LTONewPmPasses = getString(Args, OPT_lto_newpm_passes); + Config->OutputFile = getString(Args, OPT_o); + Config->SoName = getString(Args, OPT_soname); + Config->Sysroot = getString(Args, OPT_sysroot); + + Config->Optimize = getInteger(Args, OPT_O, 1); + Config->LTOO = getInteger(Args, OPT_lto_O, 2); + if (Config->LTOO > 3) + error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O)); + Config->LTOPartitions = getInteger(Args, OPT_lto_partitions, 1); + if (Config->LTOPartitions == 0) + error("--lto-partitions: number of threads must be > 0"); + Config->ThinLTOJobs = getInteger(Args, OPT_thinlto_jobs, -1u); + if (Config->ThinLTOJobs == 0) + error("--thinlto-jobs: number of threads must be > 0"); + + Config->ZCombreloc = !hasZOption(Args, "nocombreloc"); + Config->ZExecstack = hasZOption(Args, "execstack"); + Config->ZNodelete = hasZOption(Args, "nodelete"); + Config->ZNow = hasZOption(Args, "now"); + Config->ZOrigin = hasZOption(Args, "origin"); + Config->ZRelro = !hasZOption(Args, "norelro"); + Config->ZStackSize = getZOptionValue(Args, "stack-size", -1); + Config->ZWxneeded = hasZOption(Args, "wxneeded"); + + Config->OFormatBinary = isOutputFormatBinary(Args); + Config->SectionStartMap = getSectionStartMap(Args); + Config->SortSection = getSortKind(Args); + Config->Target2 = getTarget2Option(Args); + Config->UnresolvedSymbols = getUnresolvedSymbolOption(Args); + + // --omagic is an option to create old-fashioned executables in which + // .text segments are writable. Today, the option is still in use to + // create special-purpose programs such as boot loaders. It doesn't + // make sense to create PT_GNU_RELRO for such executables. + if (Config->OMagic) + Config->ZRelro = false; + + if (!Config->Relocatable) + Config->Strip = getStripOption(Args); + + // Config->Pic is true if we are generating position-independent code. + Config->Pic = Config->Pie || Config->Shared; + + if (auto *Arg = Args.getLastArg(OPT_hash_style)) { + StringRef S = Arg->getValue(); + if (S == "gnu") { + Config->GnuHash = true; + Config->SysvHash = false; + } else if (S == "both") { + Config->GnuHash = true; + } else if (S != "sysv") + error("unknown hash style: " + S); + } + + // Parse --build-id or --build-id=<style>. + if (Args.hasArg(OPT_build_id)) + Config->BuildId = BuildIdKind::Fast; + if (auto *Arg = Args.getLastArg(OPT_build_id_eq)) { + StringRef S = Arg->getValue(); + if (S == "md5") { + Config->BuildId = BuildIdKind::Md5; + } else if (S == "sha1" || S == "tree") { + Config->BuildId = BuildIdKind::Sha1; + } else if (S == "uuid") { + Config->BuildId = BuildIdKind::Uuid; + } else if (S == "none") { + Config->BuildId = BuildIdKind::None; + } else if (S.startswith("0x")) { + Config->BuildId = BuildIdKind::Hexstring; + Config->BuildIdVector = parseHex(S.substr(2)); + } else { + error("unknown --build-id style: " + S); + } + } + + for (auto *Arg : Args.filtered(OPT_auxiliary)) + Config->AuxiliaryList.push_back(Arg->getValue()); + if (!Config->Shared && !Config->AuxiliaryList.empty()) + error("-f may not be used without -shared"); + + for (auto *Arg : Args.filtered(OPT_undefined)) + Config->Undefined.push_back(Arg->getValue()); + + if (auto *Arg = Args.getLastArg(OPT_dynamic_list)) + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + readDynamicList(*Buffer); + + if (auto *Arg = Args.getLastArg(OPT_symbol_ordering_file)) + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + Config->SymbolOrderingFile = getLines(*Buffer); + + // If --retain-symbol-file is used, we'll retail only the symbols listed in + // the file and discard all others. + if (auto *Arg = Args.getLastArg(OPT_retain_symbols_file)) { + Config->Discard = DiscardPolicy::RetainFile; + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + for (StringRef S : getLines(*Buffer)) + Config->RetainSymbolsFile.insert(S); + } + + for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol)) + Config->VersionScriptGlobals.push_back( + {Arg->getValue(), /*IsExternCpp*/ false, /*HasWildcard*/ false}); + + // Dynamic lists are a simplified linker script that doesn't need the + // "global:" and implicitly ends with a "local:*". Set the variables needed to + // simulate that. + if (Args.hasArg(OPT_dynamic_list) || Args.hasArg(OPT_export_dynamic_symbol)) { + Config->ExportDynamic = true; + if (!Config->Shared) + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + } + + if (auto *Arg = Args.getLastArg(OPT_version_script)) + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + readVersionScript(*Buffer); +} + +// Returns a value of "-format" option. +static bool getBinaryOption(StringRef S) { + if (S == "binary") + return true; + if (S == "elf" || S == "default") + return false; + error("unknown -format value: " + S + + " (supported formats: elf, default, binary)"); + return false; +} + +void LinkerDriver::createFiles(opt::InputArgList &Args) { + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_l: + addLibrary(Arg->getValue()); + break; + case OPT_INPUT: + addFile(Arg->getValue()); + break; + case OPT_alias_script_T: + case OPT_script: + if (Optional<MemoryBufferRef> MB = readFile(Arg->getValue())) + readLinkerScript(*MB); + break; + case OPT_as_needed: + Config->AsNeeded = true; + break; + case OPT_format: + InBinary = getBinaryOption(Arg->getValue()); + break; + case OPT_no_as_needed: + Config->AsNeeded = false; + break; + case OPT_Bstatic: + Config->Static = true; + break; + case OPT_Bdynamic: + Config->Static = false; + break; + case OPT_whole_archive: + InWholeArchive = true; + break; + case OPT_no_whole_archive: + InWholeArchive = false; + break; + case OPT_start_lib: + InLib = true; + break; + case OPT_end_lib: + InLib = false; + break; + } + } + + if (Files.empty() && ErrorCount == 0) + error("no input files"); +} + +// If -m <machine_type> was not given, infer it from object files. +void LinkerDriver::inferMachineType() { + if (Config->EKind != ELFNoneKind) + return; + + for (InputFile *F : Files) { + if (F->EKind == ELFNoneKind) + continue; + Config->EKind = F->EKind; + Config->EMachine = F->EMachine; + Config->OSABI = F->OSABI; + Config->MipsN32Abi = Config->EMachine == EM_MIPS && isMipsN32Abi(F); + return; + } + error("target emulation unknown: -m or at least one .o file required"); +} + +// Parse -z max-page-size=<value>. The default value is defined by +// each target. +static uint64_t getMaxPageSize(opt::InputArgList &Args) { + uint64_t Val = + getZOptionValue(Args, "max-page-size", Target->DefaultMaxPageSize); + if (!isPowerOf2_64(Val)) + error("max-page-size: value isn't a power of 2"); + return Val; +} + +// Parses -image-base option. +static uint64_t getImageBase(opt::InputArgList &Args) { + // Use default if no -image-base option is given. + // Because we are using "Target" here, this function + // has to be called after the variable is initialized. + auto *Arg = Args.getLastArg(OPT_image_base); + if (!Arg) + return Config->Pic ? 0 : Target->DefaultImageBase; + + StringRef S = Arg->getValue(); + uint64_t V; + if (S.getAsInteger(0, V)) { + error("-image-base: number expected, but got " + S); + return 0; + } + if ((V % Config->MaxPageSize) != 0) + warn("-image-base: address isn't multiple of page size: " + S); + return V; +} + +// Do actual linking. Note that when this function is called, +// all linker scripts have already been parsed. +template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { + SymbolTable<ELFT> Symtab; + elf::Symtab<ELFT>::X = &Symtab; + Target = createTarget(); + ScriptBase = Script<ELFT>::X = make<LinkerScript<ELFT>>(); + + Config->Rela = + ELFT::Is64Bits || Config->EMachine == EM_X86_64 || Config->MipsN32Abi; + Config->Mips64EL = + (Config->EMachine == EM_MIPS && Config->EKind == ELF64LEKind); + Config->MaxPageSize = getMaxPageSize(Args); + Config->ImageBase = getImageBase(Args); + + // Default output filename is "a.out" by the Unix tradition. + if (Config->OutputFile.empty()) + Config->OutputFile = "a.out"; + + // Use default entry point name if no name was given via the command + // line nor linker scripts. For some reason, MIPS entry point name is + // different from others. + Config->WarnMissingEntry = + (!Config->Entry.empty() || (!Config->Shared && !Config->Relocatable)); + if (Config->Entry.empty() && !Config->Relocatable) + Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; + + // Handle --trace-symbol. + for (auto *Arg : Args.filtered(OPT_trace_symbol)) + Symtab.trace(Arg->getValue()); + + // Add all files to the symbol table. This will add almost all + // symbols that we need to the symbol table. + for (InputFile *F : Files) + Symtab.addFile(F); + + // If an entry symbol is in a static archive, pull out that file now + // to complete the symbol table. After this, no new names except a + // few linker-synthesized ones will be added to the symbol table. + if (Symtab.find(Config->Entry)) + Symtab.addUndefined(Config->Entry); + + // Return if there were name resolution errors. + if (ErrorCount) + return; + + Symtab.scanUndefinedFlags(); + Symtab.scanShlibUndefined(); + Symtab.scanVersionScript(); + + Symtab.addCombinedLTOObject(); + if (ErrorCount) + return; + + for (auto *Arg : Args.filtered(OPT_wrap)) + Symtab.wrap(Arg->getValue()); + + // Now that we have a complete list of input files. + // Beyond this point, no new files are added. + // Aggregate all input sections into one place. + for (elf::ObjectFile<ELFT> *F : Symtab.getObjectFiles()) + for (InputSectionBase<ELFT> *S : F->getSections()) + if (S && S != &InputSection<ELFT>::Discarded) + Symtab.Sections.push_back(S); + for (BinaryFile *F : Symtab.getBinaryFiles()) + for (InputSectionData *S : F->getSections()) + Symtab.Sections.push_back(cast<InputSection<ELFT>>(S)); + + // Do size optimizations: garbage collection and identical code folding. + if (Config->GcSections) + markLive<ELFT>(); + if (Config->ICF) + doIcf<ELFT>(); + + // MergeInputSection::splitIntoPieces needs to be called before + // any call of MergeInputSection::getOffset. Do that. + forEach(Symtab.Sections.begin(), Symtab.Sections.end(), + [](InputSectionBase<ELFT> *S) { + if (!S->Live) + return; + if (S->isCompressed()) + S->uncompress(); + if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(S)) + MS->splitIntoPieces(); + }); + + // Write the result to the file. + writeResult<ELFT>(); +} diff --git a/contrib/llvm/tools/lld/ELF/Driver.h b/contrib/llvm/tools/lld/ELF/Driver.h new file mode 100644 index 000000000000..b600fae34823 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Driver.h @@ -0,0 +1,78 @@ +//===- Driver.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_DRIVER_H +#define LLD_ELF_DRIVER_H + +#include "SymbolTable.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/raw_ostream.h" + +namespace lld { +namespace elf { + +extern class LinkerDriver *Driver; + +class LinkerDriver { +public: + void main(ArrayRef<const char *> Args, bool CanExitEarly); + void addFile(StringRef Path); + void addLibrary(StringRef Name); + +private: + std::vector<MemoryBufferRef> getArchiveMembers(MemoryBufferRef MB); + void readConfigs(llvm::opt::InputArgList &Args); + void createFiles(llvm::opt::InputArgList &Args); + void inferMachineType(); + template <class ELFT> void link(llvm::opt::InputArgList &Args); + + // True if we are in --whole-archive and --no-whole-archive. + bool InWholeArchive = false; + + // True if we are in --start-lib and --end-lib. + bool InLib = false; + + // True if we are in -format=binary and -format=elf. + bool InBinary = false; + + std::vector<InputFile *> Files; +}; + +// Parses command line options. +class ELFOptTable : public llvm::opt::OptTable { +public: + ELFOptTable(); + llvm::opt::InputArgList parse(ArrayRef<const char *> Argv); +}; + +// Create enum with OPT_xxx values for each option in Options.td +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID, +#include "ELF/Options.inc" +#undef OPTION +}; + +void printHelp(const char *Argv0); +std::vector<uint8_t> parseHexstring(StringRef S); + +std::string createResponseFile(const llvm::opt::InputArgList &Args); + +llvm::Optional<std::string> findFromSearchPaths(StringRef Path); +llvm::Optional<std::string> searchLibrary(StringRef Path); + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/DriverUtils.cpp b/contrib/llvm/tools/lld/ELF/DriverUtils.cpp new file mode 100644 index 000000000000..ae76958de65e --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/DriverUtils.cpp @@ -0,0 +1,190 @@ +//===- DriverUtils.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains utility functions for the driver. Because there +// are so many small functions, we created this separate file to make +// Driver.cpp less cluttered. +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Error.h" +#include "Memory.h" +#include "ScriptParser.h" +#include "lld/Config/Version.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" + +using namespace llvm; +using namespace llvm::sys; + +using namespace lld; +using namespace lld::elf; + +// Create OptTable + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "ELF/Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const opt::OptTable::Info OptInfo[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ + {X1, X2, X9, X10, OPT_##ID, opt::Option::KIND##Class, \ + X8, X7, OPT_##GROUP, OPT_##ALIAS, X6}, +#include "ELF/Options.inc" +#undef OPTION +}; + +ELFOptTable::ELFOptTable() : OptTable(OptInfo) {} + +// Parse -color-diagnostics={auto,always,never} or -no-color-diagnostics. +static bool getColorDiagnostics(opt::InputArgList &Args) { + bool Default = (ErrorOS == &errs() && Process::StandardErrHasColors()); + + auto *Arg = Args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq, + OPT_no_color_diagnostics); + if (!Arg) + return Default; + if (Arg->getOption().getID() == OPT_color_diagnostics) + return true; + if (Arg->getOption().getID() == OPT_no_color_diagnostics) + return false; + + StringRef S = Arg->getValue(); + if (S == "auto") + return Default; + if (S == "always") + return true; + if (S != "never") + error("unknown option: -color-diagnostics=" + S); + return false; +} + +static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) { + StringRef S = Arg->getValue(); + if (S != "windows" && S != "posix") + error("invalid response file quoting: " + S); + if (S == "windows") + return cl::TokenizeWindowsCommandLine; + return cl::TokenizeGNUCommandLine; + } + if (Triple(sys::getProcessTriple()).getOS() == Triple::Win32) + return cl::TokenizeWindowsCommandLine; + return cl::TokenizeGNUCommandLine; +} + +// Parses a given list of options. +opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) { + // Make InputArgList from string vectors. + unsigned MissingIndex; + unsigned MissingCount; + SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size()); + + // We need to get the quoting style for response files before parsing all + // options so we parse here before and ignore all the options but + // --rsp-quoting. + opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount); + + // Expand response files (arguments in the form of @<filename>) + // and then parse the argument again. + cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), Vec); + Args = this->ParseArgs(Vec, MissingIndex, MissingCount); + + // Interpret -color-diagnostics early so that error messages + // for unknown flags are colored. + Config->ColorDiagnostics = getColorDiagnostics(Args); + if (MissingCount) + error(Twine(Args.getArgString(MissingIndex)) + ": missing argument"); + + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + error("unknown argument: " + Arg->getSpelling()); + return Args; +} + +void elf::printHelp(const char *Argv0) { + ELFOptTable Table; + Table.PrintHelp(outs(), Argv0, "lld", false); +} + +// Reconstructs command line arguments so that so that you can re-run +// the same command with the same inputs. This is for --reproduce. +std::string elf::createResponseFile(const opt::InputArgList &Args) { + SmallString<0> Data; + raw_svector_ostream OS(Data); + + // Copy the command line to the output while rewriting paths. + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_reproduce: + break; + case OPT_INPUT: + OS << quote(rewritePath(Arg->getValue())) << "\n"; + break; + case OPT_L: + case OPT_dynamic_list: + case OPT_rpath: + case OPT_alias_script_T: + case OPT_script: + case OPT_version_script: + OS << Arg->getSpelling() << " " << quote(rewritePath(Arg->getValue())) + << "\n"; + break; + default: + OS << toString(Arg) << "\n"; + } + } + return Data.str(); +} + +// Find a file by concatenating given paths. If a resulting path +// starts with "=", the character is replaced with a --sysroot value. +static Optional<std::string> findFile(StringRef Path1, const Twine &Path2) { + SmallString<128> S; + if (Path1.startswith("=")) + path::append(S, Config->Sysroot, Path1.substr(1), Path2); + else + path::append(S, Path1, Path2); + + if (fs::exists(S)) + return S.str().str(); + return None; +} + +Optional<std::string> elf::findFromSearchPaths(StringRef Path) { + for (StringRef Dir : Config->SearchPaths) + if (Optional<std::string> S = findFile(Dir, Path)) + return S; + return None; +} + +// This is for -lfoo. We'll look for libfoo.so or libfoo.a from +// search paths. +Optional<std::string> elf::searchLibrary(StringRef Name) { + if (Name.startswith(":")) + return findFromSearchPaths(Name.substr(1)); + + for (StringRef Dir : Config->SearchPaths) { + if (!Config->Static) + if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".so")) + return S; + if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".a")) + return S; + } + return None; +} diff --git a/contrib/llvm/tools/lld/ELF/EhFrame.cpp b/contrib/llvm/tools/lld/ELF/EhFrame.cpp new file mode 100644 index 000000000000..2428473d9012 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/EhFrame.cpp @@ -0,0 +1,214 @@ +//===- EhFrame.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// .eh_frame section contains information on how to unwind the stack when +// an exception is thrown. The section consists of sequence of CIE and FDE +// records. The linker needs to merge CIEs and associate FDEs to CIEs. +// That means the linker has to understand the format of the section. +// +// This file contains a few utility functions to read .eh_frame contents. +// +//===----------------------------------------------------------------------===// + +#include "EhFrame.h" +#include "Error.h" +#include "InputSection.h" +#include "Relocations.h" +#include "Strings.h" + +#include "llvm/Object/ELF.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::dwarf; +using namespace llvm::object; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +namespace { +template <class ELFT> class EhReader { +public: + EhReader(InputSectionBase<ELFT> *S, ArrayRef<uint8_t> D) : IS(S), D(D) {} + size_t readEhRecordSize(); + uint8_t getFdeEncoding(); + +private: + template <class P> void failOn(const P *Loc, const Twine &Msg) { + fatal(IS->getLocation((const uint8_t *)Loc - IS->Data.data()) + ": " + Msg); + } + + uint8_t readByte(); + void skipBytes(size_t Count); + StringRef readString(); + void skipLeb128(); + void skipAugP(); + + InputSectionBase<ELFT> *IS; + ArrayRef<uint8_t> D; +}; +} + +template <class ELFT> +size_t elf::readEhRecordSize(InputSectionBase<ELFT> *S, size_t Off) { + return EhReader<ELFT>(S, S->Data.slice(Off)).readEhRecordSize(); +} +// .eh_frame section is a sequence of records. Each record starts with +// a 4 byte length field. This function reads the length. +template <class ELFT> size_t EhReader<ELFT>::readEhRecordSize() { + const endianness E = ELFT::TargetEndianness; + if (D.size() < 4) + failOn(D.data(), "CIE/FDE too small"); + + // First 4 bytes of CIE/FDE is the size of the record. + // If it is 0xFFFFFFFF, the next 8 bytes contain the size instead, + // but we do not support that format yet. + uint64_t V = read32<E>(D.data()); + if (V == UINT32_MAX) + failOn(D.data(), "CIE/FDE too large"); + uint64_t Size = V + 4; + if (Size > D.size()) + failOn(D.data(), "CIE/FDE ends past the end of the section"); + return Size; +} + +// Read a byte and advance D by one byte. +template <class ELFT> uint8_t EhReader<ELFT>::readByte() { + if (D.empty()) + failOn(D.data(), "unexpected end of CIE"); + uint8_t B = D.front(); + D = D.slice(1); + return B; +} + +template <class ELFT> void EhReader<ELFT>::skipBytes(size_t Count) { + if (D.size() < Count) + failOn(D.data(), "CIE is too small"); + D = D.slice(Count); +} + +// Read a null-terminated string. +template <class ELFT> StringRef EhReader<ELFT>::readString() { + const uint8_t *End = std::find(D.begin(), D.end(), '\0'); + if (End == D.end()) + failOn(D.data(), "corrupted CIE (failed to read string)"); + StringRef S = toStringRef(D.slice(0, End - D.begin())); + D = D.slice(S.size() + 1); + return S; +} + +// Skip an integer encoded in the LEB128 format. +// Actual number is not of interest because only the runtime needs it. +// But we need to be at least able to skip it so that we can read +// the field that follows a LEB128 number. +template <class ELFT> void EhReader<ELFT>::skipLeb128() { + const uint8_t *ErrPos = D.data(); + while (!D.empty()) { + uint8_t Val = D.front(); + D = D.slice(1); + if ((Val & 0x80) == 0) + return; + } + failOn(ErrPos, "corrupted CIE (failed to read LEB128)"); +} + +template <class ELFT> static size_t getAugPSize(unsigned Enc) { + switch (Enc & 0x0f) { + case DW_EH_PE_absptr: + case DW_EH_PE_signed: + return ELFT::Is64Bits ? 8 : 4; + case DW_EH_PE_udata2: + case DW_EH_PE_sdata2: + return 2; + case DW_EH_PE_udata4: + case DW_EH_PE_sdata4: + return 4; + case DW_EH_PE_udata8: + case DW_EH_PE_sdata8: + return 8; + } + return 0; +} + +template <class ELFT> void EhReader<ELFT>::skipAugP() { + uint8_t Enc = readByte(); + if ((Enc & 0xf0) == DW_EH_PE_aligned) + failOn(D.data() - 1, "DW_EH_PE_aligned encoding is not supported"); + size_t Size = getAugPSize<ELFT>(Enc); + if (Size == 0) + failOn(D.data() - 1, "unknown FDE encoding"); + if (Size >= D.size()) + failOn(D.data() - 1, "corrupted CIE"); + D = D.slice(Size); +} + +template <class ELFT> uint8_t elf::getFdeEncoding(EhSectionPiece *P) { + auto *IS = static_cast<InputSectionBase<ELFT> *>(P->ID); + return EhReader<ELFT>(IS, P->data()).getFdeEncoding(); +} + +template <class ELFT> uint8_t EhReader<ELFT>::getFdeEncoding() { + skipBytes(8); + int Version = readByte(); + if (Version != 1 && Version != 3) + failOn(D.data() - 1, + "FDE version 1 or 3 expected, but got " + Twine(Version)); + + StringRef Aug = readString(); + + // Skip code and data alignment factors. + skipLeb128(); + skipLeb128(); + + // Skip the return address register. In CIE version 1 this is a single + // byte. In CIE version 3 this is an unsigned LEB128. + if (Version == 1) + readByte(); + else + skipLeb128(); + + // We only care about an 'R' value, but other records may precede an 'R' + // record. Unfortunately records are not in TLV (type-length-value) format, + // so we need to teach the linker how to skip records for each type. + for (char C : Aug) { + if (C == 'R') + return readByte(); + if (C == 'z') { + skipLeb128(); + continue; + } + if (C == 'P') { + skipAugP(); + continue; + } + if (C == 'L') { + readByte(); + continue; + } + failOn(Aug.data(), "unknown .eh_frame augmentation string: " + Aug); + } + return DW_EH_PE_absptr; +} + +template size_t elf::readEhRecordSize<ELF32LE>(InputSectionBase<ELF32LE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF32BE>(InputSectionBase<ELF32BE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF64LE>(InputSectionBase<ELF64LE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF64BE>(InputSectionBase<ELF64BE> *S, + size_t Off); + +template uint8_t elf::getFdeEncoding<ELF32LE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF32BE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF64LE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF64BE>(EhSectionPiece *P); diff --git a/contrib/llvm/tools/lld/ELF/EhFrame.h b/contrib/llvm/tools/lld/ELF/EhFrame.h new file mode 100644 index 000000000000..cadc93d3a2e4 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/EhFrame.h @@ -0,0 +1,26 @@ +//===- EhFrame.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_EHFRAME_H +#define LLD_ELF_EHFRAME_H + +#include "lld/Core/LLVM.h" + +namespace lld { +namespace elf { +template <class ELFT> class InputSectionBase; +struct EhSectionPiece; + +template <class ELFT> +size_t readEhRecordSize(InputSectionBase<ELFT> *S, size_t Off); +template <class ELFT> uint8_t getFdeEncoding(EhSectionPiece *P); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Error.cpp b/contrib/llvm/tools/lld/ELF/Error.cpp new file mode 100644 index 000000000000..6e30f08143ed --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Error.cpp @@ -0,0 +1,106 @@ +//===- Error.cpp ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Error.h" +#include "Config.h" + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" +#include <mutex> + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#endif + +using namespace lld::elf; +using namespace llvm; + +namespace lld { + +uint64_t elf::ErrorCount; +raw_ostream *elf::ErrorOS; +StringRef elf::Argv0; + +// The functions defined in this file can be called from multiple threads, +// but outs() or errs() are not thread-safe. We protect them using a mutex. +static std::mutex Mu; + +static void print(StringRef S, raw_ostream::Colors C) { + *ErrorOS << Argv0 + ": "; + if (Config->ColorDiagnostics) { + ErrorOS->changeColor(C, true); + *ErrorOS << S; + ErrorOS->resetColor(); + } else { + *ErrorOS << S; + } +} + +void elf::log(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + if (Config->Verbose) + outs() << Argv0 << ": " << Msg << "\n"; +} + +void elf::warn(const Twine &Msg) { + if (Config->FatalWarnings) { + error(Msg); + return; + } + std::lock_guard<std::mutex> Lock(Mu); + print("warning: ", raw_ostream::MAGENTA); + *ErrorOS << Msg << "\n"; +} + +void elf::error(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + + if (Config->ErrorLimit == 0 || ErrorCount < Config->ErrorLimit) { + print("error: ", raw_ostream::RED); + *ErrorOS << Msg << "\n"; + } else if (ErrorCount == Config->ErrorLimit) { + print("error: ", raw_ostream::RED); + *ErrorOS << "too many errors emitted, stopping now" + << " (use -error-limit=0 to see all errors)\n"; + if (Config->ExitEarly) + exitLld(1); + } + + ++ErrorCount; +} + +void elf::error(std::error_code EC, const Twine &Prefix) { + error(Prefix + ": " + EC.message()); +} + +void elf::exitLld(int Val) { + // Dealloc/destroy ManagedStatic variables before calling + // _exit(). In a non-LTO build, this is a nop. In an LTO + // build allows us to get the output of -time-passes. + llvm_shutdown(); + + outs().flush(); + errs().flush(); + _exit(Val); +} + +void elf::fatal(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + print("error: ", raw_ostream::RED); + *ErrorOS << Msg << "\n"; + exitLld(1); +} + +void elf::fatal(std::error_code EC, const Twine &Prefix) { + fatal(Prefix + ": " + EC.message()); +} + +} // namespace lld diff --git a/contrib/llvm/tools/lld/ELF/Error.h b/contrib/llvm/tools/lld/ELF/Error.h new file mode 100644 index 000000000000..1ec683595cf4 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Error.h @@ -0,0 +1,81 @@ +//===- Error.h --------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// In LLD, we have three levels of errors: fatal, error or warn. +// +// Fatal makes the program exit immediately with an error message. +// You shouldn't use it except for reporting a corrupted input file. +// +// Error prints out an error message and increment a global variable +// ErrorCount to record the fact that we met an error condition. It does +// not exit, so it is safe for a lld-as-a-library use case. It is generally +// useful because it can report more than one errors in a single run. +// +// Warn doesn't do anything but printing out a given message. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ERROR_H +#define LLD_ELF_ERROR_H + +#include "lld/Core/LLVM.h" + +#include "llvm/Support/Error.h" + +namespace lld { +namespace elf { + +extern uint64_t ErrorCount; +extern llvm::raw_ostream *ErrorOS; +extern llvm::StringRef Argv0; + +void log(const Twine &Msg); +void warn(const Twine &Msg); + +void error(const Twine &Msg); +void error(std::error_code EC, const Twine &Prefix); + +LLVM_ATTRIBUTE_NORETURN void exitLld(int Val); +LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix); + +// check() functions are convenient functions to strip errors +// from error-or-value objects. +template <class T> T check(ErrorOr<T> E) { + if (auto EC = E.getError()) + fatal(EC.message()); + return std::move(*E); +} + +template <class T> T check(Expected<T> E) { + if (!E) + handleAllErrors(std::move(E.takeError()), + [](llvm::ErrorInfoBase &EIB) -> Error { + fatal(EIB.message()); + return Error::success(); + }); + return std::move(*E); +} + +template <class T> T check(ErrorOr<T> E, const Twine &Prefix) { + if (auto EC = E.getError()) + fatal(Prefix + ": " + EC.message()); + return std::move(*E); +} + +template <class T> T check(Expected<T> E, const Twine &Prefix) { + if (!E) + fatal(Prefix + ": " + errorToErrorCode(E.takeError()).message()); + return std::move(*E); +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/GdbIndex.cpp b/contrib/llvm/tools/lld/ELF/GdbIndex.cpp new file mode 100644 index 000000000000..762144dd0a96 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/GdbIndex.cpp @@ -0,0 +1,205 @@ +//===- GdbIndex.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// File contains classes for implementation of --gdb-index command line option. +// +// If that option is used, linker should emit a .gdb_index section that allows +// debugger to locate and read .dwo files, containing neccessary debug +// information. +// More information about implementation can be found in DWARF specification, +// latest version is available at http://dwarfstd.org. +// +// .gdb_index section format: +// (Information is based on/taken from +// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html (*)) +// +// A mapped index consists of several areas, laid out in order: +// 1) The file header. +// 2) "The CU (compilation unit) list. This is a sequence of pairs of 64-bit +// little-endian values, sorted by the CU offset. The first element in each +// pair is the offset of a CU in the .debug_info section. The second element +// in each pair is the length of that CU. References to a CU elsewhere in the +// map are done using a CU index, which is just the 0-based index into this +// table. Note that if there are type CUs, then conceptually CUs and type CUs +// form a single list for the purposes of CU indices."(*) +// 3) The types CU list. Depricated as .debug_types does not appear in the DWARF +// v5 specification. +// 4) The address area. The address area is a sequence of address +// entries, where each entrie contains low address, high address and CU +// index. +// 5) "The symbol table. This is an open-addressed hash table. The size of the +// hash table is always a power of 2. Each slot in the hash table consists of +// a pair of offset_type values. The first value is the offset of the +// symbol's name in the constant pool. The second value is the offset of the +// CU vector in the constant pool."(*) +// 6) "The constant pool. This is simply a bunch of bytes. It is organized so +// that alignment is correct: CU vectors are stored first, followed by +// strings." (*) +// +// For constructing the .gdb_index section following steps should be performed: +// 1) For file header nothing special should be done. It contains the offsets to +// the areas below. +// 2) Scan the compilation unit headers of the .debug_info sections to build a +// list of compilation units. +// 3) CU Types are no longer needed as DWARF skeleton type units never made it +// into the standard. lld does nothing to support parsing of .debug_types +// and generates empty types CU area in .gdb_index section. +// 4) Address area entries are extracted from DW_TAG_compile_unit DIEs of +// .debug_info sections. +// 5) For building the symbol table linker extracts the public names from the +// .debug_gnu_pubnames and .debug_gnu_pubtypes sections. Then it builds the +// hashtable in according to .gdb_index format specification. +// 6) Constant pool is populated at the same time as symbol table. +//===----------------------------------------------------------------------===// + +#include "GdbIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" +#include "llvm/Object/ELFObjectFile.h" + +using namespace llvm; +using namespace llvm::object; +using namespace lld::elf; + +template <class ELFT> +GdbIndexBuilder<ELFT>::GdbIndexBuilder(InputSection<ELFT> *DebugInfoSec) + : DebugInfoSec(DebugInfoSec) { + if (Expected<std::unique_ptr<object::ObjectFile>> Obj = + object::ObjectFile::createObjectFile(DebugInfoSec->getFile()->MB)) + Dwarf.reset(new DWARFContextInMemory(*Obj.get(), this)); + else + error(toString(DebugInfoSec->getFile()) + ": error creating DWARF context"); +} + +template <class ELFT> +std::vector<std::pair<typename ELFT::uint, typename ELFT::uint>> +GdbIndexBuilder<ELFT>::readCUList() { + std::vector<std::pair<uintX_t, uintX_t>> Ret; + for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf->compile_units()) + Ret.push_back( + {DebugInfoSec->OutSecOff + CU->getOffset(), CU->getLength() + 4}); + return Ret; +} + +template <class ELFT> +std::vector<std::pair<StringRef, uint8_t>> +GdbIndexBuilder<ELFT>::readPubNamesAndTypes() { + const bool IsLE = ELFT::TargetEndianness == llvm::support::little; + StringRef Data[] = {Dwarf->getGnuPubNamesSection(), + Dwarf->getGnuPubTypesSection()}; + + std::vector<std::pair<StringRef, uint8_t>> Ret; + for (StringRef D : Data) { + DWARFDebugPubTable PubTable(D, IsLE, true); + for (const DWARFDebugPubTable::Set &S : PubTable.getData()) + for (const DWARFDebugPubTable::Entry &E : S.Entries) + Ret.push_back({E.Name, E.Descriptor.toBits()}); + } + return Ret; +} + +std::pair<bool, GdbSymbol *> GdbHashTab::add(uint32_t Hash, size_t Offset) { + if (Size * 4 / 3 >= Table.size()) + expand(); + + GdbSymbol **Slot = findSlot(Hash, Offset); + bool New = false; + if (*Slot == nullptr) { + ++Size; + *Slot = new (Alloc) GdbSymbol(Hash, Offset); + New = true; + } + return {New, *Slot}; +} + +void GdbHashTab::expand() { + if (Table.empty()) { + Table.resize(InitialSize); + return; + } + std::vector<GdbSymbol *> NewTable(Table.size() * 2); + NewTable.swap(Table); + + for (GdbSymbol *Sym : NewTable) { + if (!Sym) + continue; + GdbSymbol **Slot = findSlot(Sym->NameHash, Sym->NameOffset); + *Slot = Sym; + } +} + +// Methods finds a slot for symbol with given hash. The step size used to find +// the next candidate slot when handling a hash collision is specified in +// .gdb_index section format. The hash value for a table entry is computed by +// applying an iterative hash function to the symbol's name. +GdbSymbol **GdbHashTab::findSlot(uint32_t Hash, size_t Offset) { + uint32_t Index = Hash & (Table.size() - 1); + uint32_t Step = ((Hash * 17) & (Table.size() - 1)) | 1; + + for (;;) { + GdbSymbol *S = Table[Index]; + if (!S || ((S->NameOffset == Offset) && (S->NameHash == Hash))) + return &Table[Index]; + Index = (Index + Step) & (Table.size() - 1); + } +} + +template <class ELFT> +static InputSectionBase<ELFT> * +findSection(ArrayRef<InputSectionBase<ELFT> *> Arr, uint64_t Offset) { + for (InputSectionBase<ELFT> *S : Arr) + if (S && S != &InputSection<ELFT>::Discarded) + if (Offset >= S->Offset && Offset < S->Offset + S->getSize()) + return S; + return nullptr; +} + +template <class ELFT> +std::vector<AddressEntry<ELFT>> +GdbIndexBuilder<ELFT>::readAddressArea(size_t CurrentCU) { + std::vector<AddressEntry<ELFT>> Ret; + for (const auto &CU : Dwarf->compile_units()) { + DWARFAddressRangesVector Ranges; + CU->collectAddressRanges(Ranges); + + ArrayRef<InputSectionBase<ELFT> *> Sections = + DebugInfoSec->getFile()->getSections(); + + for (std::pair<uint64_t, uint64_t> &R : Ranges) + if (InputSectionBase<ELFT> *S = findSection(Sections, R.first)) + Ret.push_back( + {S, R.first - S->Offset, R.second - S->Offset, CurrentCU}); + ++CurrentCU; + } + return Ret; +} + +// We return file offset as load address for allocatable sections. That is +// currently used for collecting address ranges in readAddressArea(). We are +// able then to find section index that range belongs to. +template <class ELFT> +uint64_t GdbIndexBuilder<ELFT>::getSectionLoadAddress( + const object::SectionRef &Sec) const { + if (static_cast<const ELFSectionRef &>(Sec).getFlags() & ELF::SHF_ALLOC) + return static_cast<const ELFSectionRef &>(Sec).getOffset(); + return 0; +} + +template <class ELFT> +std::unique_ptr<LoadedObjectInfo> GdbIndexBuilder<ELFT>::clone() const { + return {}; +} + +namespace lld { +namespace elf { +template class GdbIndexBuilder<ELF32LE>; +template class GdbIndexBuilder<ELF32BE>; +template class GdbIndexBuilder<ELF64LE>; +template class GdbIndexBuilder<ELF64BE>; +} +} diff --git a/contrib/llvm/tools/lld/ELF/GdbIndex.h b/contrib/llvm/tools/lld/ELF/GdbIndex.h new file mode 100644 index 000000000000..c761ea173a8d --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/GdbIndex.h @@ -0,0 +1,99 @@ +//===- GdbIndex.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// + +#ifndef LLD_ELF_GDB_INDEX_H +#define LLD_ELF_GDB_INDEX_H + +#include "InputFiles.h" +#include "llvm/Object/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" + +namespace lld { +namespace elf { + +template <class ELFT> class InputSection; + +// Struct represents single entry of address area of gdb index. +template <class ELFT> struct AddressEntry { + InputSectionBase<ELFT> *Section; + uint64_t LowAddress; + uint64_t HighAddress; + size_t CuIndex; +}; + +// GdbIndexBuilder is a helper class used for extracting data required +// for building .gdb_index section from objects. +template <class ELFT> class GdbIndexBuilder : public llvm::LoadedObjectInfo { + typedef typename ELFT::uint uintX_t; + + InputSection<ELFT> *DebugInfoSec; + + std::unique_ptr<llvm::DWARFContext> Dwarf; + +public: + GdbIndexBuilder(InputSection<ELFT> *DebugInfoSec); + + // Extracts the compilation units. Each first element of pair is a offset of a + // CU in the .debug_info section and second is the length of that CU. + std::vector<std::pair<uintX_t, uintX_t>> readCUList(); + + // Extracts the vector of address area entries. Accepts global index of last + // parsed CU. + std::vector<AddressEntry<ELFT>> readAddressArea(size_t CurrentCU); + + // Method extracts public names and types. It returns list of name and + // gnu_pub* kind pairs. + std::vector<std::pair<StringRef, uint8_t>> readPubNamesAndTypes(); + +private: + // Method returns section file offset as a load addres for DWARF parser. That + // allows to find the target section index for address ranges. + uint64_t + getSectionLoadAddress(const llvm::object::SectionRef &Sec) const override; + std::unique_ptr<llvm::LoadedObjectInfo> clone() const override; +}; + +// Element of GdbHashTab hash table. +struct GdbSymbol { + GdbSymbol(uint32_t Hash, size_t Offset) + : NameHash(Hash), NameOffset(Offset) {} + uint32_t NameHash; + size_t NameOffset; + size_t CuVectorIndex; +}; + +// This class manages the hashed symbol table for the .gdb_index section. +// The hash value for a table entry is computed by applying an iterative hash +// function to the symbol's name. +class GdbHashTab final { +public: + std::pair<bool, GdbSymbol *> add(uint32_t Hash, size_t Offset); + + size_t getCapacity() { return Table.size(); } + GdbSymbol *getSymbol(size_t I) { return Table[I]; } + +private: + void expand(); + + GdbSymbol **findSlot(uint32_t Hash, size_t Offset); + + llvm::BumpPtrAllocator Alloc; + std::vector<GdbSymbol *> Table; + + // Size keeps the amount of filled entries in Table. + size_t Size = 0; + + // Initial size must be a power of 2. + static const int32_t InitialSize = 1024; +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/ICF.cpp b/contrib/llvm/tools/lld/ELF/ICF.cpp new file mode 100644 index 000000000000..32cd0f8a185c --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/ICF.cpp @@ -0,0 +1,383 @@ +//===- ICF.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// ICF is short for Identical Code Folding. This is a size optimization to +// identify and merge two or more read-only sections (typically functions) +// that happened to have the same contents. It usually reduces output size +// by a few percent. +// +// In ICF, two sections are considered identical if they have the same +// section flags, section data, and relocations. Relocations are tricky, +// because two relocations are considered the same if they have the same +// relocation types, values, and if they point to the same sections *in +// terms of ICF*. +// +// Here is an example. If foo and bar defined below are compiled to the +// same machine instructions, ICF can and should merge the two, although +// their relocations point to each other. +// +// void foo() { bar(); } +// void bar() { foo(); } +// +// If you merge the two, their relocations point to the same section and +// thus you know they are mergeable, but how do you know they are +// mergeable in the first place? This is not an easy problem to solve. +// +// What we are doing in LLD is to partition sections into equivalence +// classes. Sections in the same equivalence class when the algorithm +// terminates are considered identical. Here are details: +// +// 1. First, we partition sections using their hash values as keys. Hash +// values contain section types, section contents and numbers of +// relocations. During this step, relocation targets are not taken into +// account. We just put sections that apparently differ into different +// equivalence classes. +// +// 2. Next, for each equivalence class, we visit sections to compare +// relocation targets. Relocation targets are considered equivalent if +// their targets are in the same equivalence class. Sections with +// different relocation targets are put into different equivalence +// clases. +// +// 3. If we split an equivalence class in step 2, two relocations +// previously target the same equivalence class may now target +// different equivalence classes. Therefore, we repeat step 2 until a +// convergence is obtained. +// +// 4. For each equivalence class C, pick an arbitrary section in C, and +// merge all the other sections in C with it. +// +// For small programs, this algorithm needs 3-5 iterations. For large +// programs such as Chromium, it takes more than 20 iterations. +// +// This algorithm was mentioned as an "optimistic algorithm" in [1], +// though gold implements a different algorithm than this. +// +// We parallelize each step so that multiple threads can work on different +// equivalence classes concurrently. That gave us a large performance +// boost when applying ICF on large programs. For example, MSVC link.exe +// or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output +// size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a +// 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still +// faster than MSVC or gold though. +// +// [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding +// in the Gold Linker +// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf +// +//===----------------------------------------------------------------------===// + +#include "ICF.h" +#include "Config.h" +#include "SymbolTable.h" +#include "Threads.h" + +#include "llvm/ADT/Hashing.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include <algorithm> +#include <atomic> + +using namespace lld; +using namespace lld::elf; +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; + +namespace { +template <class ELFT> class ICF { +public: + void run(); + +private: + void segregate(size_t Begin, size_t End, bool Constant); + + template <class RelTy> + bool constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB); + + template <class RelTy> + bool variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, + const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB); + + bool equalsConstant(const InputSection<ELFT> *A, const InputSection<ELFT> *B); + bool equalsVariable(const InputSection<ELFT> *A, const InputSection<ELFT> *B); + + size_t findBoundary(size_t Begin, size_t End); + + void forEachClassRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn); + + void forEachClass(std::function<void(size_t, size_t)> Fn); + + std::vector<InputSection<ELFT> *> Sections; + + // We repeat the main loop while `Repeat` is true. + std::atomic<bool> Repeat; + + // The main loop counter. + int Cnt = 0; + + // We have two locations for equivalence classes. On the first iteration + // of the main loop, Class[0] has a valid value, and Class[1] contains + // garbage. We read equivalence classes from slot 0 and write to slot 1. + // So, Class[0] represents the current class, and Class[1] represents + // the next class. On each iteration, we switch their roles and use them + // alternately. + // + // Why are we doing this? Recall that other threads may be working on + // other equivalence classes in parallel. They may read sections that we + // are updating. We cannot update equivalence classes in place because + // it breaks the invariance that all possibly-identical sections must be + // in the same equivalence class at any moment. In other words, the for + // loop to update equivalence classes is not atomic, and that is + // observable from other threads. By writing new classes to other + // places, we can keep the invariance. + // + // Below, `Current` has the index of the current class, and `Next` has + // the index of the next class. If threading is enabled, they are either + // (0, 1) or (1, 0). + // + // Note on single-thread: if that's the case, they are always (0, 0) + // because we can safely read the next class without worrying about race + // conditions. Using the same location makes this algorithm converge + // faster because it uses results of the same iteration earlier. + int Current = 0; + int Next = 0; +}; +} + +// Returns a hash value for S. Note that the information about +// relocation targets is not included in the hash value. +template <class ELFT> static uint32_t getHash(InputSection<ELFT> *S) { + return hash_combine(S->Flags, S->getSize(), S->NumRelocations); +} + +// Returns true if section S is subject of ICF. +template <class ELFT> static bool isEligible(InputSection<ELFT> *S) { + // .init and .fini contains instructions that must be executed to + // initialize and finalize the process. They cannot and should not + // be merged. + return S->Live && (S->Flags & SHF_ALLOC) && !(S->Flags & SHF_WRITE) && + S->Name != ".init" && S->Name != ".fini"; +} + +// Split an equivalence class into smaller classes. +template <class ELFT> +void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) { + // This loop rearranges sections in [Begin, End) so that all sections + // that are equal in terms of equals{Constant,Variable} are contiguous + // in [Begin, End). + // + // The algorithm is quadratic in the worst case, but that is not an + // issue in practice because the number of the distinct sections in + // each range is usually very small. + + while (Begin < End) { + // Divide [Begin, End) into two. Let Mid be the start index of the + // second group. + auto Bound = std::stable_partition( + Sections.begin() + Begin + 1, Sections.begin() + End, + [&](InputSection<ELFT> *S) { + if (Constant) + return equalsConstant(Sections[Begin], S); + return equalsVariable(Sections[Begin], S); + }); + size_t Mid = Bound - Sections.begin(); + + // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by + // updating the sections in [Begin, End). We use Mid as an equivalence + // class ID because every group ends with a unique index. + for (size_t I = Begin; I < Mid; ++I) + Sections[I]->Class[Next] = Mid; + + // If we created a group, we need to iterate the main loop again. + if (Mid != End) + Repeat = true; + + Begin = Mid; + } +} + +// Compare two lists of relocations. +template <class ELFT> +template <class RelTy> +bool ICF<ELFT>::constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { + auto Eq = [](const RelTy &A, const RelTy &B) { + return A.r_offset == B.r_offset && + A.getType(Config->Mips64EL) == B.getType(Config->Mips64EL) && + getAddend<ELFT>(A) == getAddend<ELFT>(B); + }; + + return RelsA.size() == RelsB.size() && + std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq); +} + +// Compare "non-moving" part of two InputSections, namely everything +// except relocation targets. +template <class ELFT> +bool ICF<ELFT>::equalsConstant(const InputSection<ELFT> *A, + const InputSection<ELFT> *B) { + if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags || + A->getSize() != B->getSize() || A->Data != B->Data) + return false; + + if (A->AreRelocsRela) + return constantEq(A->relas(), B->relas()); + return constantEq(A->rels(), B->rels()); +} + +// Compare two lists of relocations. Returns true if all pairs of +// relocations point to the same section in terms of ICF. +template <class ELFT> +template <class RelTy> +bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, + const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB) { + auto Eq = [&](const RelTy &RA, const RelTy &RB) { + // The two sections must be identical. + SymbolBody &SA = A->getFile()->getRelocTargetSym(RA); + SymbolBody &SB = B->getFile()->getRelocTargetSym(RB); + if (&SA == &SB) + return true; + + // Or, the two sections must be in the same equivalence class. + auto *DA = dyn_cast<DefinedRegular<ELFT>>(&SA); + auto *DB = dyn_cast<DefinedRegular<ELFT>>(&SB); + if (!DA || !DB) + return false; + if (DA->Value != DB->Value) + return false; + + auto *X = dyn_cast<InputSection<ELFT>>(DA->Section); + auto *Y = dyn_cast<InputSection<ELFT>>(DB->Section); + if (!X || !Y) + return false; + + // Ineligible sections are in the special equivalence class 0. + // They can never be the same in terms of the equivalence class. + if (X->Class[Current] == 0) + return false; + + return X->Class[Current] == Y->Class[Current]; + }; + + return std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq); +} + +// Compare "moving" part of two InputSections, namely relocation targets. +template <class ELFT> +bool ICF<ELFT>::equalsVariable(const InputSection<ELFT> *A, + const InputSection<ELFT> *B) { + if (A->AreRelocsRela) + return variableEq(A, A->relas(), B, B->relas()); + return variableEq(A, A->rels(), B, B->rels()); +} + +template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) { + uint32_t Class = Sections[Begin]->Class[Current]; + for (size_t I = Begin + 1; I < End; ++I) + if (Class != Sections[I]->Class[Current]) + return I; + return End; +} + +// Sections in the same equivalence class are contiguous in Sections +// vector. Therefore, Sections vector can be considered as contiguous +// groups of sections, grouped by the class. +// +// This function calls Fn on every group that starts within [Begin, End). +// Note that a group must starts in that range but doesn't necessarily +// have to end before End. +template <class ELFT> +void ICF<ELFT>::forEachClassRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn) { + if (Begin > 0) + Begin = findBoundary(Begin - 1, End); + + while (Begin < End) { + size_t Mid = findBoundary(Begin, Sections.size()); + Fn(Begin, Mid); + Begin = Mid; + } +} + +// Call Fn on each equivalence class. +template <class ELFT> +void ICF<ELFT>::forEachClass(std::function<void(size_t, size_t)> Fn) { + // If threading is disabled or the number of sections are + // too small to use threading, call Fn sequentially. + if (!Config->Threads || Sections.size() < 1024) { + forEachClassRange(0, Sections.size(), Fn); + ++Cnt; + return; + } + + Current = Cnt % 2; + Next = (Cnt + 1) % 2; + + // Split sections into 256 shards and call Fn in parallel. + size_t NumShards = 256; + size_t Step = Sections.size() / NumShards; + forLoop(0, NumShards, + [&](size_t I) { forEachClassRange(I * Step, (I + 1) * Step, Fn); }); + forEachClassRange(Step * NumShards, Sections.size(), Fn); + ++Cnt; +} + +// The main function of ICF. +template <class ELFT> void ICF<ELFT>::run() { + // Collect sections to merge. + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) + if (auto *S = dyn_cast<InputSection<ELFT>>(Sec)) + if (isEligible(S)) + Sections.push_back(S); + + // Initially, we use hash values to partition sections. + for (InputSection<ELFT> *S : Sections) + // Set MSB to 1 to avoid collisions with non-hash IDs. + S->Class[0] = getHash(S) | (1 << 31); + + // From now on, sections in Sections vector are ordered so that sections + // in the same equivalence class are consecutive in the vector. + std::stable_sort(Sections.begin(), Sections.end(), + [](InputSection<ELFT> *A, InputSection<ELFT> *B) { + return A->Class[0] < B->Class[0]; + }); + + // Compare static contents and assign unique IDs for each static content. + forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); + + // Split groups by comparing relocations until convergence is obtained. + do { + Repeat = false; + forEachClass( + [&](size_t Begin, size_t End) { segregate(Begin, End, false); }); + } while (Repeat); + + log("ICF needed " + Twine(Cnt) + " iterations"); + + // Merge sections by the equivalence class. + forEachClass([&](size_t Begin, size_t End) { + if (End - Begin == 1) + return; + + log("selected " + Sections[Begin]->Name); + for (size_t I = Begin + 1; I < End; ++I) { + log(" removed " + Sections[I]->Name); + Sections[Begin]->replace(Sections[I]); + } + }); +} + +// ICF entry point function. +template <class ELFT> void elf::doIcf() { ICF<ELFT>().run(); } + +template void elf::doIcf<ELF32LE>(); +template void elf::doIcf<ELF32BE>(); +template void elf::doIcf<ELF64LE>(); +template void elf::doIcf<ELF64BE>(); diff --git a/contrib/llvm/tools/lld/ELF/ICF.h b/contrib/llvm/tools/lld/ELF/ICF.h new file mode 100644 index 000000000000..502e128c8109 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/ICF.h @@ -0,0 +1,19 @@ +//===- ICF.h --------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ICF_H +#define LLD_ELF_ICF_H + +namespace lld { +namespace elf { +template <class ELFT> void doIcf(); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.cpp b/contrib/llvm/tools/lld/ELF/InputFiles.cpp new file mode 100644 index 000000000000..1fddf40f5b22 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/InputFiles.cpp @@ -0,0 +1,986 @@ +//===- InputFiles.cpp -----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" +#include "Error.h" +#include "InputSection.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/LTO/LTO.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/TarWriter.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::sys::fs; + +using namespace lld; +using namespace lld::elf; + +TarWriter *elf::Tar; + +namespace { +// In ELF object file all section addresses are zero. If we have multiple +// .text sections (when using -ffunction-section or comdat group) then +// LLVM DWARF parser will not be able to parse .debug_line correctly, unless +// we assign each section some unique address. This callback method assigns +// each section an address equal to its offset in ELF object file. +class ObjectInfo : public LoadedObjectInfo { +public: + uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const override { + return static_cast<const ELFSectionRef &>(Sec).getOffset(); + } + std::unique_ptr<LoadedObjectInfo> clone() const override { + return std::unique_ptr<LoadedObjectInfo>(); + } +}; +} + +Optional<MemoryBufferRef> elf::readFile(StringRef Path) { + if (Config->Verbose) + outs() << Path << "\n"; + + auto MBOrErr = MemoryBuffer::getFile(Path); + if (auto EC = MBOrErr.getError()) { + error(EC, "cannot open " + Path); + return None; + } + std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; + MemoryBufferRef MBRef = MB->getMemBufferRef(); + make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership + + if (Tar) + Tar->append(relativeToRoot(Path), MBRef.getBuffer()); + return MBRef; +} + +template <class ELFT> void elf::ObjectFile<ELFT>::initializeDwarfLine() { + std::unique_ptr<object::ObjectFile> Obj = + check(object::ObjectFile::createObjectFile(this->MB), + "createObjectFile failed"); + + ObjectInfo ObjInfo; + DWARFContextInMemory Dwarf(*Obj, &ObjInfo); + DwarfLine.reset(new DWARFDebugLine(&Dwarf.getLineSection().Relocs)); + DataExtractor LineData(Dwarf.getLineSection().Data, + ELFT::TargetEndianness == support::little, + ELFT::Is64Bits ? 8 : 4); + + // The second parameter is offset in .debug_line section + // for compilation unit (CU) of interest. We have only one + // CU (object file), so offset is always 0. + DwarfLine->getOrParseLineTable(LineData, 0); +} + +// Returns source line information for a given offset +// using DWARF debug info. +template <class ELFT> +std::string elf::ObjectFile<ELFT>::getLineInfo(InputSectionBase<ELFT> *S, + uintX_t Offset) { + if (!DwarfLine) + initializeDwarfLine(); + + // The offset to CU is 0. + const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0); + if (!Tbl) + return ""; + + // Use fake address calcuated by adding section file offset and offset in + // section. See comments for ObjectInfo class. + DILineInfo Info; + Tbl->getFileLineInfoForAddress( + S->Offset + Offset, nullptr, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info); + if (Info.Line == 0) + return ""; + return Info.FileName + ":" + std::to_string(Info.Line); +} + +// Returns "(internal)", "foo.a(bar.o)" or "baz.o". +std::string lld::toString(const InputFile *F) { + if (!F) + return "(internal)"; + if (!F->ArchiveName.empty()) + return (F->ArchiveName + "(" + F->getName() + ")").str(); + return F->getName(); +} + +template <class ELFT> static ELFKind getELFKind() { + if (ELFT::TargetEndianness == support::little) + return ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind; + return ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind; +} + +template <class ELFT> +ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) { + EKind = getELFKind<ELFT>(); + EMachine = getObj().getHeader()->e_machine; + OSABI = getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; +} + +template <class ELFT> +typename ELFT::SymRange ELFFileBase<ELFT>::getGlobalSymbols() { + return makeArrayRef(Symbols.begin() + FirstNonLocal, Symbols.end()); +} + +template <class ELFT> +uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const { + return check(getObj().getSectionIndex(&Sym, Symbols, SymtabSHNDX)); +} + +template <class ELFT> +void ELFFileBase<ELFT>::initSymtab(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr *Symtab) { + FirstNonLocal = Symtab->sh_info; + Symbols = check(getObj().symbols(Symtab)); + if (FirstNonLocal == 0 || FirstNonLocal > Symbols.size()) + fatal(toString(this) + ": invalid sh_info in symbol table"); + + StringTable = check(getObj().getStringTableForSymtab(*Symtab, Sections)); +} + +template <class ELFT> +elf::ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M) + : ELFFileBase<ELFT>(Base::ObjectKind, M) {} + +template <class ELFT> +ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getNonLocalSymbols() { + return makeArrayRef(this->SymbolBodies).slice(this->FirstNonLocal); +} + +template <class ELFT> +ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getLocalSymbols() { + if (this->SymbolBodies.empty()) + return this->SymbolBodies; + return makeArrayRef(this->SymbolBodies).slice(1, this->FirstNonLocal - 1); +} + +template <class ELFT> +ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getSymbols() { + if (this->SymbolBodies.empty()) + return this->SymbolBodies; + return makeArrayRef(this->SymbolBodies).slice(1); +} + +template <class ELFT> +void elf::ObjectFile<ELFT>::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { + // Read section and symbol tables. + initializeSections(ComdatGroups); + initializeSymbols(); +} + +// Sections with SHT_GROUP and comdat bits define comdat section groups. +// They are identified and deduplicated by group name. This function +// returns a group name. +template <class ELFT> +StringRef +elf::ObjectFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr &Sec) { + if (this->Symbols.empty()) + this->initSymtab(Sections, + check(object::getSection<ELFT>(Sections, Sec.sh_link))); + const Elf_Sym *Sym = + check(object::getSymbol<ELFT>(this->Symbols, Sec.sh_info)); + return check(Sym->getName(this->StringTable)); +} + +template <class ELFT> +ArrayRef<typename elf::ObjectFile<ELFT>::Elf_Word> +elf::ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) { + const ELFFile<ELFT> &Obj = this->getObj(); + ArrayRef<Elf_Word> Entries = + check(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec)); + if (Entries.empty() || Entries[0] != GRP_COMDAT) + fatal(toString(this) + ": unsupported SHT_GROUP format"); + return Entries.slice(1); +} + +template <class ELFT> +bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { + // We don't merge sections if -O0 (default is -O1). This makes sometimes + // the linker significantly faster, although the output will be bigger. + if (Config->Optimize == 0) + return false; + + // Do not merge sections if generating a relocatable object. It makes + // the code simpler because we do not need to update relocation addends + // to reflect changes introduced by merging. Instead of that we write + // such "merge" sections into separate OutputSections and keep SHF_MERGE + // / SHF_STRINGS flags and sh_entsize value to be able to perform merging + // later during a final linking. + if (Config->Relocatable) + return false; + + // A mergeable section with size 0 is useless because they don't have + // any data to merge. A mergeable string section with size 0 can be + // argued as invalid because it doesn't end with a null character. + // We'll avoid a mess by handling them as if they were non-mergeable. + if (Sec.sh_size == 0) + return false; + + // Check for sh_entsize. The ELF spec is not clear about the zero + // sh_entsize. It says that "the member [sh_entsize] contains 0 if + // the section does not hold a table of fixed-size entries". We know + // that Rust 1.13 produces a string mergeable section with a zero + // sh_entsize. Here we just accept it rather than being picky about it. + uintX_t EntSize = Sec.sh_entsize; + if (EntSize == 0) + return false; + if (Sec.sh_size % EntSize) + fatal(toString(this) + + ": SHF_MERGE section size must be a multiple of sh_entsize"); + + uintX_t Flags = Sec.sh_flags; + if (!(Flags & SHF_MERGE)) + return false; + if (Flags & SHF_WRITE) + fatal(toString(this) + ": writable SHF_MERGE section is not supported"); + + // Don't try to merge if the alignment is larger than the sh_entsize and this + // is not SHF_STRINGS. + // + // Since this is not a SHF_STRINGS, we would need to pad after every entity. + // It would be equivalent for the producer of the .o to just set a larger + // sh_entsize. + if (Flags & SHF_STRINGS) + return true; + + return Sec.sh_addralign <= EntSize; +} + +template <class ELFT> +void elf::ObjectFile<ELFT>::initializeSections( + DenseSet<CachedHashStringRef> &ComdatGroups) { + ArrayRef<Elf_Shdr> ObjSections = check(this->getObj().sections()); + const ELFFile<ELFT> &Obj = this->getObj(); + uint64_t Size = ObjSections.size(); + Sections.resize(Size); + unsigned I = -1; + StringRef SectionStringTable = check(Obj.getSectionStringTable(ObjSections)); + for (const Elf_Shdr &Sec : ObjSections) { + ++I; + if (Sections[I] == &InputSection<ELFT>::Discarded) + continue; + + // SHF_EXCLUDE'ed sections are discarded by the linker. However, + // if -r is given, we'll let the final link discard such sections. + // This is compatible with GNU. + if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) { + Sections[I] = &InputSection<ELFT>::Discarded; + continue; + } + + switch (Sec.sh_type) { + case SHT_GROUP: + Sections[I] = &InputSection<ELFT>::Discarded; + if (ComdatGroups.insert(CachedHashStringRef( + getShtGroupSignature(ObjSections, Sec))) + .second) + continue; + for (uint32_t SecIndex : getShtGroupEntries(Sec)) { + if (SecIndex >= Size) + fatal(toString(this) + ": invalid section index in group: " + + Twine(SecIndex)); + Sections[SecIndex] = &InputSection<ELFT>::Discarded; + } + break; + case SHT_SYMTAB: + this->initSymtab(ObjSections, &Sec); + break; + case SHT_SYMTAB_SHNDX: + this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, ObjSections)); + break; + case SHT_STRTAB: + case SHT_NULL: + break; + default: + Sections[I] = createInputSection(Sec, SectionStringTable); + } + + // .ARM.exidx sections have a reverse dependency on the InputSection they + // have a SHF_LINK_ORDER dependency, this is identified by the sh_link. + if (Sec.sh_flags & SHF_LINK_ORDER) { + if (Sec.sh_link >= Sections.size()) + fatal(toString(this) + ": invalid sh_link index: " + + Twine(Sec.sh_link)); + auto *IS = cast<InputSection<ELFT>>(Sections[Sec.sh_link]); + IS->DependentSection = Sections[I]; + } + } +} + +template <class ELFT> +InputSectionBase<ELFT> * +elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { + uint32_t Idx = Sec.sh_info; + if (Idx >= Sections.size()) + fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx)); + InputSectionBase<ELFT> *Target = Sections[Idx]; + + // Strictly speaking, a relocation section must be included in the + // group of the section it relocates. However, LLVM 3.3 and earlier + // would fail to do so, so we gracefully handle that case. + if (Target == &InputSection<ELFT>::Discarded) + return nullptr; + + if (!Target) + fatal(toString(this) + ": unsupported relocation reference"); + return Target; +} + +template <class ELFT> +InputSectionBase<ELFT> * +elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec, + StringRef SectionStringTable) { + StringRef Name = + check(this->getObj().getSectionName(&Sec, SectionStringTable)); + + switch (Sec.sh_type) { + case SHT_ARM_ATTRIBUTES: + // FIXME: ARM meta-data section. Retain the first attribute section + // we see. The eglibc ARM dynamic loaders require the presence of an + // attribute section for dlopen to work. + // In a full implementation we would merge all attribute sections. + if (In<ELFT>::ARMAttributes == nullptr) { + In<ELFT>::ARMAttributes = make<InputSection<ELFT>>(this, &Sec, Name); + return In<ELFT>::ARMAttributes; + } + return &InputSection<ELFT>::Discarded; + case SHT_RELA: + case SHT_REL: { + // This section contains relocation information. + // If -r is given, we do not interpret or apply relocation + // but just copy relocation sections to output. + if (Config->Relocatable) + return make<InputSection<ELFT>>(this, &Sec, Name); + + // Find the relocation target section and associate this + // section with it. + InputSectionBase<ELFT> *Target = getRelocTarget(Sec); + if (!Target) + return nullptr; + if (Target->FirstRelocation) + fatal(toString(this) + + ": multiple relocation sections to one section are not supported"); + if (!isa<InputSection<ELFT>>(Target) && !isa<EhInputSection<ELFT>>(Target)) + fatal(toString(this) + + ": relocations pointing to SHF_MERGE are not supported"); + + size_t NumRelocations; + if (Sec.sh_type == SHT_RELA) { + ArrayRef<Elf_Rela> Rels = check(this->getObj().relas(&Sec)); + Target->FirstRelocation = Rels.begin(); + NumRelocations = Rels.size(); + Target->AreRelocsRela = true; + } else { + ArrayRef<Elf_Rel> Rels = check(this->getObj().rels(&Sec)); + Target->FirstRelocation = Rels.begin(); + NumRelocations = Rels.size(); + Target->AreRelocsRela = false; + } + assert(isUInt<31>(NumRelocations)); + Target->NumRelocations = NumRelocations; + return nullptr; + } + } + + // .note.GNU-stack is a marker section to control the presence of + // PT_GNU_STACK segment in outputs. Since the presence of the segment + // is controlled only by the command line option (-z execstack) in LLD, + // .note.GNU-stack is ignored. + if (Name == ".note.GNU-stack") + return &InputSection<ELFT>::Discarded; + + if (Name == ".note.GNU-split-stack") { + error("objects using splitstacks are not supported"); + return &InputSection<ELFT>::Discarded; + } + + if (Config->Strip != StripPolicy::None && Name.startswith(".debug")) + return &InputSection<ELFT>::Discarded; + + // The linkonce feature is a sort of proto-comdat. Some glibc i386 object + // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce + // sections. Drop those sections to avoid duplicate symbol errors. + // FIXME: This is glibc PR20543, we should remove this hack once that has been + // fixed for a while. + if (Name.startswith(".gnu.linkonce.")) + return &InputSection<ELFT>::Discarded; + + // The linker merges EH (exception handling) frames and creates a + // .eh_frame_hdr section for runtime. So we handle them with a special + // class. For relocatable outputs, they are just passed through. + if (Name == ".eh_frame" && !Config->Relocatable) + return make<EhInputSection<ELFT>>(this, &Sec, Name); + + if (shouldMerge(Sec)) + return make<MergeInputSection<ELFT>>(this, &Sec, Name); + return make<InputSection<ELFT>>(this, &Sec, Name); +} + +template <class ELFT> void elf::ObjectFile<ELFT>::initializeSymbols() { + SymbolBodies.reserve(this->Symbols.size()); + for (const Elf_Sym &Sym : this->Symbols) + SymbolBodies.push_back(createSymbolBody(&Sym)); +} + +template <class ELFT> +InputSectionBase<ELFT> * +elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { + uint32_t Index = this->getSectionIndex(Sym); + if (Index >= Sections.size()) + fatal(toString(this) + ": invalid section index: " + Twine(Index)); + InputSectionBase<ELFT> *S = Sections[Index]; + + // We found that GNU assembler 2.17.50 [FreeBSD] 2007-07-03 could + // generate broken objects. STT_SECTION/STT_NOTYPE symbols can be + // associated with SHT_REL[A]/SHT_SYMTAB/SHT_STRTAB sections. + // In this case it is fine for section to be null here as we do not + // allocate sections of these types. + if (!S) { + if (Index == 0 || Sym.getType() == STT_SECTION || + Sym.getType() == STT_NOTYPE) + return nullptr; + fatal(toString(this) + ": invalid section index: " + Twine(Index)); + } + + if (S == &InputSection<ELFT>::Discarded) + return S; + return S->Repl; +} + +template <class ELFT> +SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { + int Binding = Sym->getBinding(); + InputSectionBase<ELFT> *Sec = getSection(*Sym); + + uint8_t StOther = Sym->st_other; + uint8_t Type = Sym->getType(); + uintX_t Value = Sym->st_value; + uintX_t Size = Sym->st_size; + + if (Binding == STB_LOCAL) { + if (Sym->getType() == STT_FILE) + SourceFile = check(Sym->getName(this->StringTable)); + + if (this->StringTable.size() <= Sym->st_name) + fatal(toString(this) + ": invalid symbol name offset"); + + StringRefZ Name = this->StringTable.data() + Sym->st_name; + if (Sym->st_shndx == SHN_UNDEF) + return new (BAlloc) + Undefined<ELFT>(Name, /*IsLocal=*/true, StOther, Type, this); + + return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther, + Type, Value, Size, Sec, this); + } + + StringRef Name = check(Sym->getName(this->StringTable)); + + switch (Sym->st_shndx) { + case SHN_UNDEF: + return elf::Symtab<ELFT>::X + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, + /*CanOmitFromDynSym=*/false, this) + ->body(); + case SHN_COMMON: + if (Value == 0 || Value >= UINT32_MAX) + fatal(toString(this) + ": common symbol '" + Name + + "' has invalid alignment: " + Twine(Value)); + return elf::Symtab<ELFT>::X + ->addCommon(Name, Size, Value, Binding, StOther, Type, this) + ->body(); + } + + switch (Binding) { + default: + fatal(toString(this) + ": unexpected binding: " + Twine(Binding)); + case STB_GLOBAL: + case STB_WEAK: + case STB_GNU_UNIQUE: + if (Sec == &InputSection<ELFT>::Discarded) + return elf::Symtab<ELFT>::X + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, + /*CanOmitFromDynSym=*/false, this) + ->body(); + return elf::Symtab<ELFT>::X + ->addRegular(Name, StOther, Type, Value, Size, Binding, Sec, this) + ->body(); + } +} + +template <class ELFT> void ArchiveFile::parse() { + File = check(Archive::create(MB), + MB.getBufferIdentifier() + ": failed to parse archive"); + + // Read the symbol table to construct Lazy objects. + for (const Archive::Symbol &Sym : File->symbols()) + Symtab<ELFT>::X->addLazyArchive(this, Sym); +} + +// Returns a buffer pointing to a member file containing a given symbol. +std::pair<MemoryBufferRef, uint64_t> +ArchiveFile::getMember(const Archive::Symbol *Sym) { + Archive::Child C = + check(Sym->getMember(), + "could not get the member for symbol " + Sym->getName()); + + if (!Seen.insert(C.getChildOffset()).second) + return {MemoryBufferRef(), 0}; + + MemoryBufferRef Ret = + check(C.getMemoryBufferRef(), + "could not get the buffer for the member defining symbol " + + Sym->getName()); + + if (C.getParent()->isThin() && Tar) + Tar->append(relativeToRoot(check(C.getFullName())), Ret.getBuffer()); + if (C.getParent()->isThin()) + return {Ret, 0}; + return {Ret, C.getChildOffset()}; +} + +template <class ELFT> +SharedFile<ELFT>::SharedFile(MemoryBufferRef M) + : ELFFileBase<ELFT>(Base::SharedKind, M), AsNeeded(Config->AsNeeded) {} + +template <class ELFT> +const typename ELFT::Shdr * +SharedFile<ELFT>::getSection(const Elf_Sym &Sym) const { + return check( + this->getObj().getSection(&Sym, this->Symbols, this->SymtabSHNDX)); +} + +// Partially parse the shared object file so that we can call +// getSoName on this object. +template <class ELFT> void SharedFile<ELFT>::parseSoName() { + const Elf_Shdr *DynamicSec = nullptr; + + const ELFFile<ELFT> Obj = this->getObj(); + ArrayRef<Elf_Shdr> Sections = check(Obj.sections()); + for (const Elf_Shdr &Sec : Sections) { + switch (Sec.sh_type) { + default: + continue; + case SHT_DYNSYM: + this->initSymtab(Sections, &Sec); + break; + case SHT_DYNAMIC: + DynamicSec = &Sec; + break; + case SHT_SYMTAB_SHNDX: + this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, Sections)); + break; + case SHT_GNU_versym: + this->VersymSec = &Sec; + break; + case SHT_GNU_verdef: + this->VerdefSec = &Sec; + break; + } + } + + if (this->VersymSec && this->Symbols.empty()) + error("SHT_GNU_versym should be associated with symbol table"); + + // DSOs are identified by soname, and they usually contain + // DT_SONAME tag in their header. But if they are missing, + // filenames are used as default sonames. + SoName = sys::path::filename(this->getName()); + + if (!DynamicSec) + return; + + ArrayRef<Elf_Dyn> Arr = + check(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec), + toString(this) + ": getSectionContentsAsArray failed"); + for (const Elf_Dyn &Dyn : Arr) { + if (Dyn.d_tag == DT_SONAME) { + uintX_t Val = Dyn.getVal(); + if (Val >= this->StringTable.size()) + fatal(toString(this) + ": invalid DT_SONAME entry"); + SoName = StringRef(this->StringTable.data() + Val); + return; + } + } +} + +// Parse the version definitions in the object file if present. Returns a vector +// whose nth element contains a pointer to the Elf_Verdef for version identifier +// n. Version identifiers that are not definitions map to nullptr. The array +// always has at least length 1. +template <class ELFT> +std::vector<const typename ELFT::Verdef *> +SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) { + std::vector<const Elf_Verdef *> Verdefs(1); + // We only need to process symbol versions for this DSO if it has both a + // versym and a verdef section, which indicates that the DSO contains symbol + // version definitions. + if (!VersymSec || !VerdefSec) + return Verdefs; + + // The location of the first global versym entry. + const char *Base = this->MB.getBuffer().data(); + Versym = reinterpret_cast<const Elf_Versym *>(Base + VersymSec->sh_offset) + + this->FirstNonLocal; + + // We cannot determine the largest verdef identifier without inspecting + // every Elf_Verdef, but both bfd and gold assign verdef identifiers + // sequentially starting from 1, so we predict that the largest identifier + // will be VerdefCount. + unsigned VerdefCount = VerdefSec->sh_info; + Verdefs.resize(VerdefCount + 1); + + // Build the Verdefs array by following the chain of Elf_Verdef objects + // from the start of the .gnu.version_d section. + const char *Verdef = Base + VerdefSec->sh_offset; + for (unsigned I = 0; I != VerdefCount; ++I) { + auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef); + Verdef += CurVerdef->vd_next; + unsigned VerdefIndex = CurVerdef->vd_ndx; + if (Verdefs.size() <= VerdefIndex) + Verdefs.resize(VerdefIndex + 1); + Verdefs[VerdefIndex] = CurVerdef; + } + + return Verdefs; +} + +// Fully parse the shared object file. This must be called after parseSoName(). +template <class ELFT> void SharedFile<ELFT>::parseRest() { + // Create mapping from version identifiers to Elf_Verdef entries. + const Elf_Versym *Versym = nullptr; + std::vector<const Elf_Verdef *> Verdefs = parseVerdefs(Versym); + + Elf_Sym_Range Syms = this->getGlobalSymbols(); + for (const Elf_Sym &Sym : Syms) { + unsigned VersymIndex = 0; + if (Versym) { + VersymIndex = Versym->vs_index; + ++Versym; + } + bool Hidden = VersymIndex & VERSYM_HIDDEN; + VersymIndex = VersymIndex & ~VERSYM_HIDDEN; + + StringRef Name = check(Sym.getName(this->StringTable)); + if (Sym.isUndefined()) { + Undefs.push_back(Name); + continue; + } + + // Ignore local symbols. + if (Versym && VersymIndex == VER_NDX_LOCAL) + continue; + + const Elf_Verdef *V = + VersymIndex == VER_NDX_GLOBAL ? nullptr : Verdefs[VersymIndex]; + + if (!Hidden) + elf::Symtab<ELFT>::X->addShared(this, Name, Sym, V); + + // Also add the symbol with the versioned name to handle undefined symbols + // with explicit versions. + if (V) { + StringRef VerName = this->StringTable.data() + V->getAux()->vda_name; + Name = Saver.save(Twine(Name) + "@" + VerName); + elf::Symtab<ELFT>::X->addShared(this, Name, Sym, V); + } + } +} + +static ELFKind getBitcodeELFKind(MemoryBufferRef MB) { + Triple T(check(getBitcodeTargetTriple(MB))); + if (T.isLittleEndian()) + return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind; + return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind; +} + +static uint8_t getBitcodeMachineKind(MemoryBufferRef MB) { + Triple T(check(getBitcodeTargetTriple(MB))); + switch (T.getArch()) { + case Triple::aarch64: + return EM_AARCH64; + case Triple::arm: + return EM_ARM; + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: + case Triple::mips64el: + return EM_MIPS; + case Triple::ppc: + return EM_PPC; + case Triple::ppc64: + return EM_PPC64; + case Triple::x86: + return T.isOSIAMCU() ? EM_IAMCU : EM_386; + case Triple::x86_64: + return EM_X86_64; + default: + fatal(MB.getBufferIdentifier() + + ": could not infer e_machine from bitcode target triple " + T.str()); + } +} + +BitcodeFile::BitcodeFile(MemoryBufferRef MB) : InputFile(BitcodeKind, MB) { + EKind = getBitcodeELFKind(MB); + EMachine = getBitcodeMachineKind(MB); +} + +static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { + switch (GvVisibility) { + case GlobalValue::DefaultVisibility: + return STV_DEFAULT; + case GlobalValue::HiddenVisibility: + return STV_HIDDEN; + case GlobalValue::ProtectedVisibility: + return STV_PROTECTED; + } + llvm_unreachable("unknown visibility"); +} + +template <class ELFT> +static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats, + const lto::InputFile::Symbol &ObjSym, + BitcodeFile *F) { + StringRef NameRef = Saver.save(ObjSym.getName()); + uint32_t Flags = ObjSym.getFlags(); + uint32_t Binding = (Flags & BasicSymbolRef::SF_Weak) ? STB_WEAK : STB_GLOBAL; + + uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE; + uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); + bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); + + int C = check(ObjSym.getComdatIndex()); + if (C != -1 && !KeptComdats[C]) + return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, + Visibility, Type, CanOmitFromDynSym, + F); + + if (Flags & BasicSymbolRef::SF_Undefined) + return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, + Visibility, Type, CanOmitFromDynSym, + F); + + if (Flags & BasicSymbolRef::SF_Common) + return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(), + ObjSym.getCommonAlignment(), Binding, + Visibility, STT_OBJECT, F); + + return Symtab<ELFT>::X->addBitcode(NameRef, Binding, Visibility, Type, + CanOmitFromDynSym, F); +} + +template <class ELFT> +void BitcodeFile::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { + + // Here we pass a new MemoryBufferRef which is identified by ArchiveName + // (the fully resolved path of the archive) + member name + offset of the + // member in the archive. + // ThinLTO uses the MemoryBufferRef identifier to access its internal + // data structures and if two archives define two members with the same name, + // this causes a collision which result in only one of the objects being + // taken into consideration at LTO time (which very likely causes undefined + // symbols later in the link stage). + Obj = check(lto::InputFile::create(MemoryBufferRef( + MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier() + + utostr(OffsetInArchive))))); + + std::vector<bool> KeptComdats; + for (StringRef S : Obj->getComdatTable()) { + StringRef N = Saver.save(S); + KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(N)).second); + } + + for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) + Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, this)); +} + +template <template <class> class T> +static InputFile *createELFFile(MemoryBufferRef MB) { + unsigned char Size; + unsigned char Endian; + std::tie(Size, Endian) = getElfArchType(MB.getBuffer()); + if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB) + fatal(MB.getBufferIdentifier() + ": invalid data encoding"); + + size_t BufSize = MB.getBuffer().size(); + if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) || + (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr))) + fatal(MB.getBufferIdentifier() + ": file is too short"); + + InputFile *Obj; + if (Size == ELFCLASS32 && Endian == ELFDATA2LSB) + Obj = make<T<ELF32LE>>(MB); + else if (Size == ELFCLASS32 && Endian == ELFDATA2MSB) + Obj = make<T<ELF32BE>>(MB); + else if (Size == ELFCLASS64 && Endian == ELFDATA2LSB) + Obj = make<T<ELF64LE>>(MB); + else if (Size == ELFCLASS64 && Endian == ELFDATA2MSB) + Obj = make<T<ELF64BE>>(MB); + else + fatal(MB.getBufferIdentifier() + ": invalid file class"); + + if (!Config->FirstElf) + Config->FirstElf = Obj; + return Obj; +} + +template <class ELFT> void BinaryFile::parse() { + StringRef Buf = MB.getBuffer(); + ArrayRef<uint8_t> Data = + makeArrayRef<uint8_t>((const uint8_t *)Buf.data(), Buf.size()); + + std::string Filename = MB.getBufferIdentifier(); + std::transform(Filename.begin(), Filename.end(), Filename.begin(), + [](char C) { return isalnum(C) ? C : '_'; }); + Filename = "_binary_" + Filename; + StringRef StartName = Saver.save(Twine(Filename) + "_start"); + StringRef EndName = Saver.save(Twine(Filename) + "_end"); + StringRef SizeName = Saver.save(Twine(Filename) + "_size"); + + auto *Section = + make<InputSection<ELFT>>(SHF_ALLOC, SHT_PROGBITS, 8, Data, ".data"); + Sections.push_back(Section); + + elf::Symtab<ELFT>::X->addRegular(StartName, STV_DEFAULT, STT_OBJECT, 0, 0, + STB_GLOBAL, Section, nullptr); + elf::Symtab<ELFT>::X->addRegular(EndName, STV_DEFAULT, STT_OBJECT, + Data.size(), 0, STB_GLOBAL, Section, + nullptr); + elf::Symtab<ELFT>::X->addRegular(SizeName, STV_DEFAULT, STT_OBJECT, + Data.size(), 0, STB_GLOBAL, nullptr, + nullptr); +} + +static bool isBitcode(MemoryBufferRef MB) { + using namespace sys::fs; + return identify_magic(MB.getBuffer()) == file_magic::bitcode; +} + +InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName, + uint64_t OffsetInArchive) { + InputFile *F = + isBitcode(MB) ? make<BitcodeFile>(MB) : createELFFile<ObjectFile>(MB); + F->ArchiveName = ArchiveName; + F->OffsetInArchive = OffsetInArchive; + return F; +} + +InputFile *elf::createSharedFile(MemoryBufferRef MB) { + return createELFFile<SharedFile>(MB); +} + +MemoryBufferRef LazyObjectFile::getBuffer() { + if (Seen) + return MemoryBufferRef(); + Seen = true; + return MB; +} + +template <class ELFT> void LazyObjectFile::parse() { + for (StringRef Sym : getSymbols()) + Symtab<ELFT>::X->addLazyObject(Sym, *this); +} + +template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() { + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::SymRange Elf_Sym_Range; + + const ELFFile<ELFT> Obj(this->MB.getBuffer()); + ArrayRef<Elf_Shdr> Sections = check(Obj.sections()); + for (const Elf_Shdr &Sec : Sections) { + if (Sec.sh_type != SHT_SYMTAB) + continue; + Elf_Sym_Range Syms = check(Obj.symbols(&Sec)); + uint32_t FirstNonLocal = Sec.sh_info; + StringRef StringTable = check(Obj.getStringTableForSymtab(Sec, Sections)); + std::vector<StringRef> V; + for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal)) + if (Sym.st_shndx != SHN_UNDEF) + V.push_back(check(Sym.getName(StringTable))); + return V; + } + return {}; +} + +std::vector<StringRef> LazyObjectFile::getBitcodeSymbols() { + std::unique_ptr<lto::InputFile> Obj = check(lto::InputFile::create(this->MB)); + std::vector<StringRef> V; + for (const lto::InputFile::Symbol &Sym : Obj->symbols()) + if (!(Sym.getFlags() & BasicSymbolRef::SF_Undefined)) + V.push_back(Saver.save(Sym.getName())); + return V; +} + +// Returns a vector of globally-visible defined symbol names. +std::vector<StringRef> LazyObjectFile::getSymbols() { + if (isBitcode(this->MB)) + return getBitcodeSymbols(); + + unsigned char Size; + unsigned char Endian; + std::tie(Size, Endian) = getElfArchType(this->MB.getBuffer()); + if (Size == ELFCLASS32) { + if (Endian == ELFDATA2LSB) + return getElfSymbols<ELF32LE>(); + return getElfSymbols<ELF32BE>(); + } + if (Endian == ELFDATA2LSB) + return getElfSymbols<ELF64LE>(); + return getElfSymbols<ELF64BE>(); +} + +template void ArchiveFile::parse<ELF32LE>(); +template void ArchiveFile::parse<ELF32BE>(); +template void ArchiveFile::parse<ELF64LE>(); +template void ArchiveFile::parse<ELF64BE>(); + +template void BitcodeFile::parse<ELF32LE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF32BE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF64LE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF64BE>(DenseSet<CachedHashStringRef> &); + +template void LazyObjectFile::parse<ELF32LE>(); +template void LazyObjectFile::parse<ELF32BE>(); +template void LazyObjectFile::parse<ELF64LE>(); +template void LazyObjectFile::parse<ELF64BE>(); + +template class elf::ELFFileBase<ELF32LE>; +template class elf::ELFFileBase<ELF32BE>; +template class elf::ELFFileBase<ELF64LE>; +template class elf::ELFFileBase<ELF64BE>; + +template class elf::ObjectFile<ELF32LE>; +template class elf::ObjectFile<ELF32BE>; +template class elf::ObjectFile<ELF64LE>; +template class elf::ObjectFile<ELF64BE>; + +template class elf::SharedFile<ELF32LE>; +template class elf::SharedFile<ELF32BE>; +template class elf::SharedFile<ELF64LE>; +template class elf::SharedFile<ELF64BE>; + +template void BinaryFile::parse<ELF32LE>(); +template void BinaryFile::parse<ELF32BE>(); +template void BinaryFile::parse<ELF64LE>(); +template void BinaryFile::parse<ELF64BE>(); diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.h b/contrib/llvm/tools/lld/ELF/InputFiles.h new file mode 100644 index 000000000000..73dda7b566b8 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/InputFiles.h @@ -0,0 +1,342 @@ +//===- InputFiles.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_INPUT_FILES_H +#define LLD_ELF_INPUT_FILES_H + +#include "Config.h" +#include "InputSection.h" +#include "Error.h" +#include "Symbols.h" + +#include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Comdat.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/IRObjectFile.h" + +#include <map> + +namespace llvm { +class DWARFDebugLine; +class TarWriter; +namespace lto { +class InputFile; +} +} + +namespace lld { +namespace elf { +class InputFile; +} + +// Returns "(internal)", "foo.a(bar.o)" or "baz.o". +std::string toString(const elf::InputFile *F); + +namespace elf { + +using llvm::object::Archive; + +class Lazy; +class SymbolBody; + +// If -reproduce option is given, all input files are written +// to this tar archive. +extern llvm::TarWriter *Tar; + +// Opens a given file. +llvm::Optional<MemoryBufferRef> readFile(StringRef Path); + +// The root class of input files. +class InputFile { +public: + enum Kind { + ObjectKind, + SharedKind, + LazyObjectKind, + ArchiveKind, + BitcodeKind, + BinaryKind, + }; + + Kind kind() const { return FileKind; } + + StringRef getName() const { return MB.getBufferIdentifier(); } + MemoryBufferRef MB; + + // Filename of .a which contained this file. If this file was + // not in an archive file, it is the empty string. We use this + // string for creating error messages. + StringRef ArchiveName; + + // If this file is in an archive, the member contains the offset of + // the file in the archive. Otherwise, it's just zero. We store this + // field so that we can pass it to lib/LTO in order to disambiguate + // between objects. + uint64_t OffsetInArchive; + + // If this is an architecture-specific file, the following members + // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. + ELFKind EKind = ELFNoneKind; + uint16_t EMachine = llvm::ELF::EM_NONE; + uint8_t OSABI = 0; + +protected: + InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} + +private: + const Kind FileKind; +}; + +template <typename ELFT> class ELFFileBase : public InputFile { +public: + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::SymRange Elf_Sym_Range; + + ELFFileBase(Kind K, MemoryBufferRef M); + static bool classof(const InputFile *F) { + Kind K = F->kind(); + return K == ObjectKind || K == SharedKind; + } + + llvm::object::ELFFile<ELFT> getObj() const { + return llvm::object::ELFFile<ELFT>(MB.getBuffer()); + } + + StringRef getStringTable() const { return StringTable; } + + uint32_t getSectionIndex(const Elf_Sym &Sym) const; + + Elf_Sym_Range getGlobalSymbols(); + +protected: + ArrayRef<Elf_Sym> Symbols; + uint32_t FirstNonLocal = 0; + ArrayRef<Elf_Word> SymtabSHNDX; + StringRef StringTable; + void initSymtab(ArrayRef<Elf_Shdr> Sections, const Elf_Shdr *Symtab); +}; + +// .o file. +template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> { + typedef ELFFileBase<ELFT> Base; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::SymRange Elf_Sym_Range; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::uint uintX_t; + + StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr &Sec); + ArrayRef<Elf_Word> getShtGroupEntries(const Elf_Shdr &Sec); + +public: + static bool classof(const InputFile *F) { + return F->kind() == Base::ObjectKind; + } + + ArrayRef<SymbolBody *> getSymbols(); + ArrayRef<SymbolBody *> getLocalSymbols(); + ArrayRef<SymbolBody *> getNonLocalSymbols(); + + explicit ObjectFile(MemoryBufferRef M); + void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); + + ArrayRef<InputSectionBase<ELFT> *> getSections() const { return Sections; } + InputSectionBase<ELFT> *getSection(const Elf_Sym &Sym) const; + + SymbolBody &getSymbolBody(uint32_t SymbolIndex) const { + if (SymbolIndex >= SymbolBodies.size()) + fatal(toString(this) + ": invalid symbol index"); + return *SymbolBodies[SymbolIndex]; + } + + template <typename RelT> + SymbolBody &getRelocTargetSym(const RelT &Rel) const { + uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL); + return getSymbolBody(SymIndex); + } + + // Returns source line information for a given offset. + // If no information is available, returns "". + std::string getLineInfo(InputSectionBase<ELFT> *S, uintX_t Offset); + + // MIPS GP0 value defined by this file. This value represents the gp value + // used to create the relocatable object and required to support + // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. + uint32_t MipsGp0 = 0; + + // The number is the offset in the string table. It will be used as the + // st_name of the symbol. + std::vector<std::pair<const DefinedRegular<ELFT> *, unsigned>> KeptLocalSyms; + + // Name of source file obtained from STT_FILE symbol value, + // or empty string if there is no such symbol in object file + // symbol table. + StringRef SourceFile; + +private: + void + initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); + void initializeSymbols(); + void initializeDwarfLine(); + InputSectionBase<ELFT> *getRelocTarget(const Elf_Shdr &Sec); + InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec, + StringRef SectionStringTable); + + bool shouldMerge(const Elf_Shdr &Sec); + SymbolBody *createSymbolBody(const Elf_Sym *Sym); + + // List of all sections defined by this file. + std::vector<InputSectionBase<ELFT> *> Sections; + + // List of all symbols referenced or defined by this file. + std::vector<SymbolBody *> SymbolBodies; + + // Debugging information to retrieve source file and line for error + // reporting. Linker may find reasonable number of errors in a + // single object file, so we cache debugging information in order to + // parse it only once for each object file we link. + std::unique_ptr<llvm::DWARFDebugLine> DwarfLine; +}; + +// LazyObjectFile is analogous to ArchiveFile in the sense that +// the file contains lazy symbols. The difference is that +// LazyObjectFile wraps a single file instead of multiple files. +// +// This class is used for --start-lib and --end-lib options which +// instruct the linker to link object files between them with the +// archive file semantics. +class LazyObjectFile : public InputFile { +public: + explicit LazyObjectFile(MemoryBufferRef M) : InputFile(LazyObjectKind, M) {} + + static bool classof(const InputFile *F) { + return F->kind() == LazyObjectKind; + } + + template <class ELFT> void parse(); + MemoryBufferRef getBuffer(); + +private: + std::vector<StringRef> getSymbols(); + template <class ELFT> std::vector<StringRef> getElfSymbols(); + std::vector<StringRef> getBitcodeSymbols(); + + bool Seen = false; +}; + +// An ArchiveFile object represents a .a file. +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } + template <class ELFT> void parse(); + + // Returns a memory buffer for a given symbol and the offset in the archive + // for the member. An empty memory buffer and an offset of zero + // is returned if we have already returned the same memory buffer. + // (So that we don't instantiate same members more than once.) + std::pair<MemoryBufferRef, uint64_t> getMember(const Archive::Symbol *Sym); + +private: + std::unique_ptr<Archive> File; + llvm::DenseSet<uint64_t> Seen; +}; + +class BitcodeFile : public InputFile { +public: + explicit BitcodeFile(MemoryBufferRef M); + static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } + template <class ELFT> + void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); + ArrayRef<Symbol *> getSymbols() { return Symbols; } + std::unique_ptr<llvm::lto::InputFile> Obj; + +private: + std::vector<Symbol *> Symbols; +}; + +// .so file. +template <class ELFT> class SharedFile : public ELFFileBase<ELFT> { + typedef ELFFileBase<ELFT> Base; + typedef typename ELFT::Dyn Elf_Dyn; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::SymRange Elf_Sym_Range; + typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::Versym Elf_Versym; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::uint uintX_t; + + std::vector<StringRef> Undefs; + StringRef SoName; + const Elf_Shdr *VersymSec = nullptr; + const Elf_Shdr *VerdefSec = nullptr; + +public: + StringRef getSoName() const { return SoName; } + const Elf_Shdr *getSection(const Elf_Sym &Sym) const; + llvm::ArrayRef<StringRef> getUndefinedSymbols() { return Undefs; } + + static bool classof(const InputFile *F) { + return F->kind() == Base::SharedKind; + } + + explicit SharedFile(MemoryBufferRef M); + + void parseSoName(); + void parseRest(); + std::vector<const Elf_Verdef *> parseVerdefs(const Elf_Versym *&Versym); + + struct NeededVer { + // The string table offset of the version name in the output file. + size_t StrTab; + + // The version identifier for this version name. + uint16_t Index; + }; + + // Mapping from Elf_Verdef data structures to information about Elf_Vernaux + // data structures in the output file. + std::map<const Elf_Verdef *, NeededVer> VerdefMap; + + // Used for --as-needed + bool AsNeeded = false; + bool IsUsed = false; + bool isNeeded() const { return !AsNeeded || IsUsed; } +}; + +class BinaryFile : public InputFile { +public: + explicit BinaryFile(MemoryBufferRef M) : InputFile(BinaryKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == BinaryKind; } + template <class ELFT> void parse(); + ArrayRef<InputSectionData *> getSections() const { return Sections; } + +private: + std::vector<InputSectionData *> Sections; +}; + +InputFile *createObjectFile(MemoryBufferRef MB, StringRef ArchiveName = "", + uint64_t OffsetInArchive = 0); +InputFile *createSharedFile(MemoryBufferRef MB); + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/InputSection.cpp b/contrib/llvm/tools/lld/ELF/InputSection.cpp new file mode 100644 index 000000000000..e87d92aa207c --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/InputSection.cpp @@ -0,0 +1,850 @@ +//===- InputSection.cpp ---------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputSection.h" +#include "Config.h" +#include "EhFrame.h" +#include "Error.h" +#include "InputFiles.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "OutputSections.h" +#include "Relocations.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Thunks.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/Endian.h" +#include <mutex> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +// Returns a string to construct an error message. +template <class ELFT> +std::string lld::toString(const InputSectionBase<ELFT> *Sec) { + return (Sec->getFile()->getName() + ":(" + Sec->Name + ")").str(); +} + +template <class ELFT> +static ArrayRef<uint8_t> getSectionContents(elf::ObjectFile<ELFT> *File, + const typename ELFT::Shdr *Hdr) { + if (!File || Hdr->sh_type == SHT_NOBITS) + return makeArrayRef<uint8_t>(nullptr, Hdr->sh_size); + return check(File->getObj().getSectionContents(Hdr)); +} + +template <class ELFT> +InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File, + uintX_t Flags, uint32_t Type, + uintX_t Entsize, uint32_t Link, + uint32_t Info, uintX_t Addralign, + ArrayRef<uint8_t> Data, StringRef Name, + Kind SectionKind) + : InputSectionData(SectionKind, Name, Data, + !Config->GcSections || !(Flags & SHF_ALLOC)), + File(File), Flags(Flags), Entsize(Entsize), Type(Type), Link(Link), + Info(Info), Repl(this) { + NumRelocations = 0; + AreRelocsRela = false; + + // The ELF spec states that a value of 0 means the section has + // no alignment constraits. + uint64_t V = std::max<uint64_t>(Addralign, 1); + if (!isPowerOf2_64(V)) + fatal(toString(File) + ": section sh_addralign is not a power of 2"); + + // We reject object files having insanely large alignments even though + // they are allowed by the spec. I think 4GB is a reasonable limitation. + // We might want to relax this in the future. + if (V > UINT32_MAX) + fatal(toString(File) + ": section sh_addralign is too large"); + Alignment = V; + + // If it is not a mergeable section, overwrite the flag so that the flag + // is consistent with the class. This inconsistency could occur when + // string merging is disabled using -O0 flag. + if (!Config->Relocatable && !isa<MergeInputSection<ELFT>>(this)) + this->Flags &= ~(SHF_MERGE | SHF_STRINGS); +} + +template <class ELFT> +InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File, + const Elf_Shdr *Hdr, StringRef Name, + Kind SectionKind) + : InputSectionBase(File, Hdr->sh_flags & ~SHF_INFO_LINK, Hdr->sh_type, + Hdr->sh_entsize, Hdr->sh_link, Hdr->sh_info, + Hdr->sh_addralign, getSectionContents(File, Hdr), Name, + SectionKind) { + this->Offset = Hdr->sh_offset; +} + +template <class ELFT> size_t InputSectionBase<ELFT>::getSize() const { + if (auto *S = dyn_cast<SyntheticSection<ELFT>>(this)) + return S->getSize(); + + if (auto *D = dyn_cast<InputSection<ELFT>>(this)) + if (D->getThunksSize() > 0) + return D->getThunkOff() + D->getThunksSize(); + + return Data.size(); +} + +// Returns a string for an error message. +template <class SectionT> static std::string getName(SectionT *Sec) { + return (Sec->getFile()->getName() + ":(" + Sec->Name + ")").str(); +} + +template <class ELFT> +typename ELFT::uint InputSectionBase<ELFT>::getOffset(uintX_t Offset) const { + switch (kind()) { + case Regular: + return cast<InputSection<ELFT>>(this)->OutSecOff + Offset; + case Synthetic: + // For synthetic sections we treat offset -1 as the end of the section. + // The same approach is used for synthetic symbols (DefinedSynthetic). + return cast<InputSection<ELFT>>(this)->OutSecOff + + (Offset == uintX_t(-1) ? getSize() : Offset); + case EHFrame: + // The file crtbeginT.o has relocations pointing to the start of an empty + // .eh_frame that is known to be the first in the link. It does that to + // identify the start of the output .eh_frame. + return Offset; + case Merge: + return cast<MergeInputSection<ELFT>>(this)->getOffset(Offset); + } + llvm_unreachable("invalid section kind"); +} + +template <class ELFT> bool InputSectionBase<ELFT>::isCompressed() const { + return (Flags & SHF_COMPRESSED) || Name.startswith(".zdebug"); +} + +// Returns compressed data and its size when uncompressed. +template <class ELFT> +std::pair<ArrayRef<uint8_t>, uint64_t> +InputSectionBase<ELFT>::getElfCompressedData(ArrayRef<uint8_t> Data) { + // Compressed section with Elf_Chdr is the ELF standard. + if (Data.size() < sizeof(Elf_Chdr)) + fatal(toString(this) + ": corrupted compressed section"); + auto *Hdr = reinterpret_cast<const Elf_Chdr *>(Data.data()); + if (Hdr->ch_type != ELFCOMPRESS_ZLIB) + fatal(toString(this) + ": unsupported compression type"); + return {Data.slice(sizeof(*Hdr)), Hdr->ch_size}; +} + +// Returns compressed data and its size when uncompressed. +template <class ELFT> +std::pair<ArrayRef<uint8_t>, uint64_t> +InputSectionBase<ELFT>::getRawCompressedData(ArrayRef<uint8_t> Data) { + // Compressed sections without Elf_Chdr header contain this header + // instead. This is a GNU extension. + struct ZlibHeader { + char Magic[4]; // Should be "ZLIB" + char Size[8]; // Uncompressed size in big-endian + }; + + if (Data.size() < sizeof(ZlibHeader)) + fatal(toString(this) + ": corrupted compressed section"); + auto *Hdr = reinterpret_cast<const ZlibHeader *>(Data.data()); + if (memcmp(Hdr->Magic, "ZLIB", 4)) + fatal(toString(this) + ": broken ZLIB-compressed section"); + return {Data.slice(sizeof(*Hdr)), read64be(Hdr->Size)}; +} + +// Uncompress section contents. Note that this function is called +// from parallel_for_each, so it must be thread-safe. +template <class ELFT> void InputSectionBase<ELFT>::uncompress() { + if (!zlib::isAvailable()) + fatal(toString(this) + + ": build lld with zlib to enable compressed sections support"); + + // This section is compressed. Here we decompress it. Ideally, all + // compressed sections have SHF_COMPRESSED bit and their contents + // start with headers of Elf_Chdr type. However, sections whose + // names start with ".zdebug_" don't have the bit and contains a raw + // ZLIB-compressed data (which is a bad thing because section names + // shouldn't be significant in ELF.) We need to be able to read both. + ArrayRef<uint8_t> Buf; // Compressed data + size_t Size; // Uncompressed size + if (Flags & SHF_COMPRESSED) + std::tie(Buf, Size) = getElfCompressedData(Data); + else + std::tie(Buf, Size) = getRawCompressedData(Data); + + // Uncompress Buf. + char *OutputBuf; + { + static std::mutex Mu; + std::lock_guard<std::mutex> Lock(Mu); + OutputBuf = BAlloc.Allocate<char>(Size); + } + if (zlib::uncompress(toStringRef(Buf), OutputBuf, Size) != zlib::StatusOK) + fatal(toString(this) + ": error while uncompressing section"); + Data = ArrayRef<uint8_t>((uint8_t *)OutputBuf, Size); +} + +template <class ELFT> +typename ELFT::uint +InputSectionBase<ELFT>::getOffset(const DefinedRegular<ELFT> &Sym) const { + return getOffset(Sym.Value); +} + +template <class ELFT> +InputSectionBase<ELFT> *InputSectionBase<ELFT>::getLinkOrderDep() const { + if ((Flags & SHF_LINK_ORDER) && Link != 0) + return getFile()->getSections()[Link]; + return nullptr; +} + +// Returns a source location string. Used to construct an error message. +template <class ELFT> +std::string InputSectionBase<ELFT>::getLocation(typename ELFT::uint Offset) { + // First check if we can get desired values from debugging information. + std::string LineInfo = File->getLineInfo(this, Offset); + if (!LineInfo.empty()) + return LineInfo; + + // File->SourceFile contains STT_FILE symbol that contains a + // source file name. If it's missing, we use an object file name. + std::string SrcFile = File->SourceFile; + if (SrcFile.empty()) + SrcFile = toString(File); + + // Find a function symbol that encloses a given location. + for (SymbolBody *B : File->getSymbols()) + if (auto *D = dyn_cast<DefinedRegular<ELFT>>(B)) + if (D->Section == this && D->Type == STT_FUNC) + if (D->Value <= Offset && Offset < D->Value + D->Size) + return SrcFile + ":(function " + toString(*D) + ")"; + + // If there's no symbol, print out the offset in the section. + return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str(); +} + +template <class ELFT> +InputSection<ELFT>::InputSection() : InputSectionBase<ELFT>() {} + +template <class ELFT> +InputSection<ELFT>::InputSection(uintX_t Flags, uint32_t Type, + uintX_t Addralign, ArrayRef<uint8_t> Data, + StringRef Name, Kind K) + : InputSectionBase<ELFT>(nullptr, Flags, Type, + /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Addralign, + Data, Name, K) {} + +template <class ELFT> +InputSection<ELFT>::InputSection(elf::ObjectFile<ELFT> *F, + const Elf_Shdr *Header, StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, Base::Regular) {} + +template <class ELFT> +bool InputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == Base::Regular || S->kind() == Base::Synthetic; +} + +template <class ELFT> +InputSectionBase<ELFT> *InputSection<ELFT>::getRelocatedSection() { + assert(this->Type == SHT_RELA || this->Type == SHT_REL); + ArrayRef<InputSectionBase<ELFT> *> Sections = this->File->getSections(); + return Sections[this->Info]; +} + +template <class ELFT> void InputSection<ELFT>::addThunk(const Thunk<ELFT> *T) { + Thunks.push_back(T); +} + +template <class ELFT> uint64_t InputSection<ELFT>::getThunkOff() const { + return this->Data.size(); +} + +template <class ELFT> uint64_t InputSection<ELFT>::getThunksSize() const { + uint64_t Total = 0; + for (const Thunk<ELFT> *T : Thunks) + Total += T->size(); + return Total; +} + +// This is used for -r. We can't use memcpy to copy relocations because we need +// to update symbol table offset and section index for each relocation. So we +// copy relocations one by one. +template <class ELFT> +template <class RelTy> +void InputSection<ELFT>::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) { + InputSectionBase<ELFT> *RelocatedSection = getRelocatedSection(); + + for (const RelTy &Rel : Rels) { + uint32_t Type = Rel.getType(Config->Mips64EL); + SymbolBody &Body = this->File->getRelocTargetSym(Rel); + + Elf_Rela *P = reinterpret_cast<Elf_Rela *>(Buf); + Buf += sizeof(RelTy); + + if (Config->Rela) + P->r_addend = getAddend<ELFT>(Rel); + P->r_offset = RelocatedSection->getOffset(Rel.r_offset); + P->setSymbolAndType(Body.DynsymIndex, Type, Config->Mips64EL); + } +} + +static uint32_t getARMUndefinedRelativeWeakVA(uint32_t Type, uint32_t A, + uint32_t P) { + switch (Type) { + case R_ARM_THM_JUMP11: + return P + 2; + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_PREL31: + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + return P + 4; + case R_ARM_THM_CALL: + // We don't want an interworking BLX to ARM + return P + 5; + default: + return A; + } +} + +static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t Type, uint64_t A, + uint64_t P) { + switch (Type) { + case R_AARCH64_CALL26: + case R_AARCH64_CONDBR19: + case R_AARCH64_JUMP26: + case R_AARCH64_TSTBR14: + return P + 4; + default: + return A; + } +} + +template <class ELFT> +static typename ELFT::uint +getRelocTargetVA(uint32_t Type, typename ELFT::uint A, typename ELFT::uint P, + const SymbolBody &Body, RelExpr Expr) { + switch (Expr) { + case R_HINT: + case R_TLSDESC_CALL: + llvm_unreachable("cannot relocate hint relocs"); + case R_TLSLD: + return In<ELFT>::Got->getTlsIndexOff() + A - In<ELFT>::Got->getSize(); + case R_TLSLD_PC: + return In<ELFT>::Got->getTlsIndexVA() + A - P; + case R_THUNK_ABS: + return Body.getThunkVA<ELFT>() + A; + case R_THUNK_PC: + case R_THUNK_PLT_PC: + return Body.getThunkVA<ELFT>() + A - P; + case R_PPC_TOC: + return getPPC64TocBase() + A; + case R_TLSGD: + return In<ELFT>::Got->getGlobalDynOffset(Body) + A - + In<ELFT>::Got->getSize(); + case R_TLSGD_PC: + return In<ELFT>::Got->getGlobalDynAddr(Body) + A - P; + case R_TLSDESC: + return In<ELFT>::Got->getGlobalDynAddr(Body) + A; + case R_TLSDESC_PAGE: + return getAArch64Page(In<ELFT>::Got->getGlobalDynAddr(Body) + A) - + getAArch64Page(P); + case R_PLT: + return Body.getPltVA<ELFT>() + A; + case R_PLT_PC: + case R_PPC_PLT_OPD: + return Body.getPltVA<ELFT>() + A - P; + case R_SIZE: + return Body.getSize<ELFT>() + A; + case R_GOTREL: + return Body.getVA<ELFT>(A) - In<ELFT>::Got->getVA(); + case R_GOTREL_FROM_END: + return Body.getVA<ELFT>(A) - In<ELFT>::Got->getVA() - + In<ELFT>::Got->getSize(); + case R_RELAX_TLS_GD_TO_IE_END: + case R_GOT_FROM_END: + return Body.getGotOffset<ELFT>() + A - In<ELFT>::Got->getSize(); + case R_RELAX_TLS_GD_TO_IE_ABS: + case R_GOT: + return Body.getGotVA<ELFT>() + A; + case R_RELAX_TLS_GD_TO_IE_PAGE_PC: + case R_GOT_PAGE_PC: + return getAArch64Page(Body.getGotVA<ELFT>() + A) - getAArch64Page(P); + case R_RELAX_TLS_GD_TO_IE: + case R_GOT_PC: + return Body.getGotVA<ELFT>() + A - P; + case R_GOTONLY_PC: + return In<ELFT>::Got->getVA() + A - P; + case R_GOTONLY_PC_FROM_END: + return In<ELFT>::Got->getVA() + A - P + In<ELFT>::Got->getSize(); + case R_RELAX_TLS_LD_TO_LE: + case R_RELAX_TLS_IE_TO_LE: + case R_RELAX_TLS_GD_TO_LE: + case R_TLS: + // A weak undefined TLS symbol resolves to the base of the TLS + // block, i.e. gets a value of zero. If we pass --gc-sections to + // lld and .tbss is not referenced, it gets reclaimed and we don't + // create a TLS program header. Therefore, we resolve this + // statically to zero. + if (Body.isTls() && (Body.isLazy() || Body.isUndefined()) && + Body.symbol()->isWeak()) + return 0; + if (Target->TcbSize) + return Body.getVA<ELFT>(A) + + alignTo(Target->TcbSize, Out<ELFT>::TlsPhdr->p_align); + return Body.getVA<ELFT>(A) - Out<ELFT>::TlsPhdr->p_memsz; + case R_RELAX_TLS_GD_TO_LE_NEG: + case R_NEG_TLS: + return Out<ELF32LE>::TlsPhdr->p_memsz - Body.getVA<ELFT>(A); + case R_ABS: + case R_RELAX_GOT_PC_NOPIC: + return Body.getVA<ELFT>(A); + case R_GOT_OFF: + return Body.getGotOffset<ELFT>() + A; + case R_MIPS_GOT_LOCAL_PAGE: + // If relocation against MIPS local symbol requires GOT entry, this entry + // should be initialized by 'page address'. This address is high 16-bits + // of sum the symbol's value and the addend. + return In<ELFT>::MipsGot->getVA() + + In<ELFT>::MipsGot->getPageEntryOffset(Body, A) - + In<ELFT>::MipsGot->getGp(); + case R_MIPS_GOT_OFF: + case R_MIPS_GOT_OFF32: + // In case of MIPS if a GOT relocation has non-zero addend this addend + // should be applied to the GOT entry content not to the GOT entry offset. + // That is why we use separate expression type. + return In<ELFT>::MipsGot->getVA() + + In<ELFT>::MipsGot->getBodyEntryOffset(Body, A) - + In<ELFT>::MipsGot->getGp(); + case R_MIPS_GOTREL: + return Body.getVA<ELFT>(A) - In<ELFT>::MipsGot->getGp(); + case R_MIPS_TLSGD: + return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() + + In<ELFT>::MipsGot->getGlobalDynOffset(Body) - + In<ELFT>::MipsGot->getGp(); + case R_MIPS_TLSLD: + return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() + + In<ELFT>::MipsGot->getTlsIndexOff() - In<ELFT>::MipsGot->getGp(); + case R_PPC_OPD: { + uint64_t SymVA = Body.getVA<ELFT>(A); + // If we have an undefined weak symbol, we might get here with a symbol + // address of zero. That could overflow, but the code must be unreachable, + // so don't bother doing anything at all. + if (!SymVA) + return 0; + if (Out<ELF64BE>::Opd) { + // If this is a local call, and we currently have the address of a + // function-descriptor, get the underlying code address instead. + uint64_t OpdStart = Out<ELF64BE>::Opd->Addr; + uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->Size; + bool InOpd = OpdStart <= SymVA && SymVA < OpdEnd; + if (InOpd) + SymVA = read64be(&Out<ELF64BE>::OpdBuf[SymVA - OpdStart]); + } + return SymVA - P; + } + case R_PC: + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) { + // On ARM and AArch64 a branch to an undefined weak resolves to the + // next instruction, otherwise the place. + if (Config->EMachine == EM_ARM) + return getARMUndefinedRelativeWeakVA(Type, A, P); + if (Config->EMachine == EM_AARCH64) + return getAArch64UndefinedRelativeWeakVA(Type, A, P); + } + case R_RELAX_GOT_PC: + return Body.getVA<ELFT>(A) - P; + case R_PLT_PAGE_PC: + case R_PAGE_PC: + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) + return getAArch64Page(A); + return getAArch64Page(Body.getVA<ELFT>(A)) - getAArch64Page(P); + } + llvm_unreachable("Invalid expression"); +} + +// This function applies relocations to sections without SHF_ALLOC bit. +// Such sections are never mapped to memory at runtime. Debug sections are +// an example. Relocations in non-alloc sections are much easier to +// handle than in allocated sections because it will never need complex +// treatement such as GOT or PLT (because at runtime no one refers them). +// So, we handle relocations for non-alloc sections directly in this +// function as a performance optimization. +template <class ELFT> +template <class RelTy> +void InputSection<ELFT>::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) { + for (const RelTy &Rel : Rels) { + uint32_t Type = Rel.getType(Config->Mips64EL); + uintX_t Offset = this->getOffset(Rel.r_offset); + uint8_t *BufLoc = Buf + Offset; + uintX_t Addend = getAddend<ELFT>(Rel); + if (!RelTy::IsRela) + Addend += Target->getImplicitAddend(BufLoc, Type); + + SymbolBody &Sym = this->File->getRelocTargetSym(Rel); + if (Target->getRelExpr(Type, Sym) != R_ABS) { + error(this->getLocation(Offset) + ": has non-ABS reloc"); + return; + } + + uintX_t AddrLoc = this->OutSec->Addr + Offset; + uint64_t SymVA = 0; + if (!Sym.isTls() || Out<ELFT>::TlsPhdr) + SymVA = SignExtend64<sizeof(uintX_t) * 8>( + getRelocTargetVA<ELFT>(Type, Addend, AddrLoc, Sym, R_ABS)); + Target->relocateOne(BufLoc, Type, SymVA); + } +} + +template <class ELFT> +void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd) { + // scanReloc function in Writer.cpp constructs Relocations + // vector only for SHF_ALLOC'ed sections. For other sections, + // we handle relocations directly here. + auto *IS = dyn_cast<InputSection<ELFT>>(this); + if (IS && !(IS->Flags & SHF_ALLOC)) { + if (IS->AreRelocsRela) + IS->relocateNonAlloc(Buf, IS->relas()); + else + IS->relocateNonAlloc(Buf, IS->rels()); + return; + } + + const unsigned Bits = sizeof(uintX_t) * 8; + for (const Relocation &Rel : Relocations) { + uintX_t Offset = getOffset(Rel.Offset); + uint8_t *BufLoc = Buf + Offset; + uint32_t Type = Rel.Type; + uintX_t A = Rel.Addend; + + uintX_t AddrLoc = OutSec->Addr + Offset; + RelExpr Expr = Rel.Expr; + uint64_t TargetVA = SignExtend64<Bits>( + getRelocTargetVA<ELFT>(Type, A, AddrLoc, *Rel.Sym, Expr)); + + switch (Expr) { + case R_RELAX_GOT_PC: + case R_RELAX_GOT_PC_NOPIC: + Target->relaxGot(BufLoc, TargetVA); + break; + case R_RELAX_TLS_IE_TO_LE: + Target->relaxTlsIeToLe(BufLoc, Type, TargetVA); + break; + case R_RELAX_TLS_LD_TO_LE: + Target->relaxTlsLdToLe(BufLoc, Type, TargetVA); + break; + case R_RELAX_TLS_GD_TO_LE: + case R_RELAX_TLS_GD_TO_LE_NEG: + Target->relaxTlsGdToLe(BufLoc, Type, TargetVA); + break; + case R_RELAX_TLS_GD_TO_IE: + case R_RELAX_TLS_GD_TO_IE_ABS: + case R_RELAX_TLS_GD_TO_IE_PAGE_PC: + case R_RELAX_TLS_GD_TO_IE_END: + Target->relaxTlsGdToIe(BufLoc, Type, TargetVA); + break; + case R_PPC_PLT_OPD: + // Patch a nop (0x60000000) to a ld. + if (BufLoc + 8 <= BufEnd && read32be(BufLoc + 4) == 0x60000000) + write32be(BufLoc + 4, 0xe8410028); // ld %r2, 40(%r1) + // fallthrough + default: + Target->relocateOne(BufLoc, Type, TargetVA); + break; + } + } +} + +template <class ELFT> void InputSection<ELFT>::writeTo(uint8_t *Buf) { + if (this->Type == SHT_NOBITS) + return; + + if (auto *S = dyn_cast<SyntheticSection<ELFT>>(this)) { + S->writeTo(Buf + OutSecOff); + return; + } + + // If -r is given, then an InputSection may be a relocation section. + if (this->Type == SHT_RELA) { + copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rela>()); + return; + } + if (this->Type == SHT_REL) { + copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rel>()); + return; + } + + // Copy section contents from source object file to output file. + ArrayRef<uint8_t> Data = this->Data; + memcpy(Buf + OutSecOff, Data.data(), Data.size()); + + // Iterate over all relocation sections that apply to this section. + uint8_t *BufEnd = Buf + OutSecOff + Data.size(); + this->relocate(Buf, BufEnd); + + // The section might have a data/code generated by the linker and need + // to be written after the section. Usually these are thunks - small piece + // of code used to jump between "incompatible" functions like PIC and non-PIC + // or if the jump target too far and its address does not fit to the short + // jump istruction. + if (!Thunks.empty()) { + Buf += OutSecOff + getThunkOff(); + for (const Thunk<ELFT> *T : Thunks) { + T->writeTo(Buf); + Buf += T->size(); + } + } +} + +template <class ELFT> +void InputSection<ELFT>::replace(InputSection<ELFT> *Other) { + this->Alignment = std::max(this->Alignment, Other->Alignment); + Other->Repl = this->Repl; + Other->Live = false; +} + +template <class ELFT> +EhInputSection<ELFT>::EhInputSection(elf::ObjectFile<ELFT> *F, + const Elf_Shdr *Header, StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::EHFrame) { + // Mark .eh_frame sections as live by default because there are + // usually no relocations that point to .eh_frames. Otherwise, + // the garbage collector would drop all .eh_frame sections. + this->Live = true; +} + +template <class ELFT> +bool EhInputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == InputSectionBase<ELFT>::EHFrame; +} + +// Returns the index of the first relocation that points to a region between +// Begin and Begin+Size. +template <class IntTy, class RelTy> +static unsigned getReloc(IntTy Begin, IntTy Size, const ArrayRef<RelTy> &Rels, + unsigned &RelocI) { + // Start search from RelocI for fast access. That works because the + // relocations are sorted in .eh_frame. + for (unsigned N = Rels.size(); RelocI < N; ++RelocI) { + const RelTy &Rel = Rels[RelocI]; + if (Rel.r_offset < Begin) + continue; + + if (Rel.r_offset < Begin + Size) + return RelocI; + return -1; + } + return -1; +} + +// .eh_frame is a sequence of CIE or FDE records. +// This function splits an input section into records and returns them. +template <class ELFT> void EhInputSection<ELFT>::split() { + // Early exit if already split. + if (!this->Pieces.empty()) + return; + + if (this->NumRelocations) { + if (this->AreRelocsRela) + split(this->relas()); + else + split(this->rels()); + return; + } + split(makeArrayRef<typename ELFT::Rela>(nullptr, nullptr)); +} + +template <class ELFT> +template <class RelTy> +void EhInputSection<ELFT>::split(ArrayRef<RelTy> Rels) { + ArrayRef<uint8_t> Data = this->Data; + unsigned RelI = 0; + for (size_t Off = 0, End = Data.size(); Off != End;) { + size_t Size = readEhRecordSize<ELFT>(this, Off); + this->Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI)); + // The empty record is the end marker. + if (Size == 4) + break; + Off += Size; + } +} + +static size_t findNull(ArrayRef<uint8_t> A, size_t EntSize) { + // Optimize the common case. + StringRef S((const char *)A.data(), A.size()); + if (EntSize == 1) + return S.find(0); + + for (unsigned I = 0, N = S.size(); I != N; I += EntSize) { + const char *B = S.begin() + I; + if (std::all_of(B, B + EntSize, [](char C) { return C == 0; })) + return I; + } + return StringRef::npos; +} + +// Split SHF_STRINGS section. Such section is a sequence of +// null-terminated strings. +template <class ELFT> +void MergeInputSection<ELFT>::splitStrings(ArrayRef<uint8_t> Data, + size_t EntSize) { + size_t Off = 0; + bool IsAlloc = this->Flags & SHF_ALLOC; + while (!Data.empty()) { + size_t End = findNull(Data, EntSize); + if (End == StringRef::npos) + fatal(toString(this) + ": string is not null terminated"); + size_t Size = End + EntSize; + Pieces.emplace_back(Off, !IsAlloc); + Hashes.push_back(hash_value(toStringRef(Data.slice(0, Size)))); + Data = Data.slice(Size); + Off += Size; + } +} + +// Split non-SHF_STRINGS section. Such section is a sequence of +// fixed size records. +template <class ELFT> +void MergeInputSection<ELFT>::splitNonStrings(ArrayRef<uint8_t> Data, + size_t EntSize) { + size_t Size = Data.size(); + assert((Size % EntSize) == 0); + bool IsAlloc = this->Flags & SHF_ALLOC; + for (unsigned I = 0, N = Size; I != N; I += EntSize) { + Hashes.push_back(hash_value(toStringRef(Data.slice(I, EntSize)))); + Pieces.emplace_back(I, !IsAlloc); + } +} + +template <class ELFT> +MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F, + const Elf_Shdr *Header, + StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::Merge) {} + +// This function is called after we obtain a complete list of input sections +// that need to be linked. This is responsible to split section contents +// into small chunks for further processing. +// +// Note that this function is called from parallel_for_each. This must be +// thread-safe (i.e. no memory allocation from the pools). +template <class ELFT> void MergeInputSection<ELFT>::splitIntoPieces() { + ArrayRef<uint8_t> Data = this->Data; + uintX_t EntSize = this->Entsize; + if (this->Flags & SHF_STRINGS) + splitStrings(Data, EntSize); + else + splitNonStrings(Data, EntSize); + + if (Config->GcSections && (this->Flags & SHF_ALLOC)) + for (uintX_t Off : LiveOffsets) + this->getSectionPiece(Off)->Live = true; +} + +template <class ELFT> +bool MergeInputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == InputSectionBase<ELFT>::Merge; +} + +// Do binary search to get a section piece at a given input offset. +template <class ELFT> +SectionPiece *MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) { + auto *This = static_cast<const MergeInputSection<ELFT> *>(this); + return const_cast<SectionPiece *>(This->getSectionPiece(Offset)); +} + +template <class It, class T, class Compare> +static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) { + size_t Size = std::distance(First, Last); + assert(Size != 0); + while (Size != 1) { + size_t H = Size / 2; + const It MI = First + H; + Size -= H; + First = Comp(Value, *MI) ? First : First + H; + } + return Comp(Value, *First) ? First : First + 1; +} + +template <class ELFT> +const SectionPiece * +MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) const { + uintX_t Size = this->Data.size(); + if (Offset >= Size) + fatal(toString(this) + ": entry is past the end of the section"); + + // Find the element this offset points to. + auto I = fastUpperBound( + Pieces.begin(), Pieces.end(), Offset, + [](const uintX_t &A, const SectionPiece &B) { return A < B.InputOff; }); + --I; + return &*I; +} + +// Returns the offset in an output section for a given input offset. +// Because contents of a mergeable section is not contiguous in output, +// it is not just an addition to a base output offset. +template <class ELFT> +typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) const { + // Initialize OffsetMap lazily. + std::call_once(InitOffsetMap, [&] { + OffsetMap.reserve(Pieces.size()); + for (const SectionPiece &Piece : Pieces) + OffsetMap[Piece.InputOff] = Piece.OutputOff; + }); + + // Find a string starting at a given offset. + auto It = OffsetMap.find(Offset); + if (It != OffsetMap.end()) + return It->second; + + if (!this->Live) + return 0; + + // If Offset is not at beginning of a section piece, it is not in the map. + // In that case we need to search from the original section piece vector. + const SectionPiece &Piece = *this->getSectionPiece(Offset); + if (!Piece.Live) + return 0; + + uintX_t Addend = Offset - Piece.InputOff; + return Piece.OutputOff + Addend; +} + +template class elf::InputSectionBase<ELF32LE>; +template class elf::InputSectionBase<ELF32BE>; +template class elf::InputSectionBase<ELF64LE>; +template class elf::InputSectionBase<ELF64BE>; + +template class elf::InputSection<ELF32LE>; +template class elf::InputSection<ELF32BE>; +template class elf::InputSection<ELF64LE>; +template class elf::InputSection<ELF64BE>; + +template class elf::EhInputSection<ELF32LE>; +template class elf::EhInputSection<ELF32BE>; +template class elf::EhInputSection<ELF64LE>; +template class elf::EhInputSection<ELF64BE>; + +template class elf::MergeInputSection<ELF32LE>; +template class elf::MergeInputSection<ELF32BE>; +template class elf::MergeInputSection<ELF64LE>; +template class elf::MergeInputSection<ELF64BE>; + +template std::string lld::toString(const InputSectionBase<ELF32LE> *); +template std::string lld::toString(const InputSectionBase<ELF32BE> *); +template std::string lld::toString(const InputSectionBase<ELF64LE> *); +template std::string lld::toString(const InputSectionBase<ELF64BE> *); diff --git a/contrib/llvm/tools/lld/ELF/InputSection.h b/contrib/llvm/tools/lld/ELF/InputSection.h new file mode 100644 index 000000000000..fc7a7fb60973 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/InputSection.h @@ -0,0 +1,326 @@ +//===- InputSection.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_INPUT_SECTION_H +#define LLD_ELF_INPUT_SECTION_H + +#include "Config.h" +#include "Relocations.h" +#include "Thunks.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Object/ELF.h" +#include <mutex> + +namespace lld { +namespace elf { + +class DefinedCommon; +class SymbolBody; +struct SectionPiece; + +template <class ELFT> class DefinedRegular; +template <class ELFT> class ObjectFile; +template <class ELFT> class OutputSection; +class OutputSectionBase; + +// We need non-template input section class to store symbol layout +// in linker script parser structures, where we do not have ELFT +// template parameter. For each scripted output section symbol we +// store pointer to preceding InputSectionData object or nullptr, +// if symbol should be placed at the very beginning of the output +// section +class InputSectionData { +public: + enum Kind { Regular, EHFrame, Merge, Synthetic, }; + + // The garbage collector sets sections' Live bits. + // If GC is disabled, all sections are considered live by default. + InputSectionData(Kind SectionKind, StringRef Name, ArrayRef<uint8_t> Data, + bool Live) + : SectionKind(SectionKind), Live(Live), Assigned(false), Name(Name), + Data(Data) {} + +private: + unsigned SectionKind : 3; + +public: + Kind kind() const { return (Kind)SectionKind; } + + unsigned Live : 1; // for garbage collection + unsigned Assigned : 1; // for linker script + uint32_t Alignment; + StringRef Name; + ArrayRef<uint8_t> Data; + + template <typename T> llvm::ArrayRef<T> getDataAs() const { + size_t S = Data.size(); + assert(S % sizeof(T) == 0); + return llvm::makeArrayRef<T>((const T *)Data.data(), S / sizeof(T)); + } + + std::vector<Relocation> Relocations; +}; + +// This corresponds to a section of an input file. +template <class ELFT> class InputSectionBase : public InputSectionData { +protected: + typedef typename ELFT::Chdr Elf_Chdr; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + + // The file this section is from. + ObjectFile<ELFT> *File; + +public: + // These corresponds to the fields in Elf_Shdr. + uintX_t Flags; + uintX_t Offset = 0; + uintX_t Entsize; + uint32_t Type; + uint32_t Link; + uint32_t Info; + + InputSectionBase() + : InputSectionData(Regular, "", ArrayRef<uint8_t>(), false), Repl(this) { + NumRelocations = 0; + AreRelocsRela = false; + } + + InputSectionBase(ObjectFile<ELFT> *File, const Elf_Shdr *Header, + StringRef Name, Kind SectionKind); + InputSectionBase(ObjectFile<ELFT> *File, uintX_t Flags, uint32_t Type, + uintX_t Entsize, uint32_t Link, uint32_t Info, + uintX_t Addralign, ArrayRef<uint8_t> Data, StringRef Name, + Kind SectionKind); + OutputSectionBase *OutSec = nullptr; + + // Relocations that refer to this section. + const Elf_Rel *FirstRelocation = nullptr; + unsigned NumRelocations : 31; + unsigned AreRelocsRela : 1; + ArrayRef<Elf_Rel> rels() const { + assert(!AreRelocsRela); + return llvm::makeArrayRef(FirstRelocation, NumRelocations); + } + ArrayRef<Elf_Rela> relas() const { + assert(AreRelocsRela); + return llvm::makeArrayRef(static_cast<const Elf_Rela *>(FirstRelocation), + NumRelocations); + } + + // This pointer points to the "real" instance of this instance. + // Usually Repl == this. However, if ICF merges two sections, + // Repl pointer of one section points to another section. So, + // if you need to get a pointer to this instance, do not use + // this but instead this->Repl. + InputSectionBase<ELFT> *Repl; + + // Returns the size of this section (even if this is a common or BSS.) + size_t getSize() const; + + ObjectFile<ELFT> *getFile() const { return File; } + llvm::object::ELFFile<ELFT> getObj() const { return File->getObj(); } + uintX_t getOffset(const DefinedRegular<ELFT> &Sym) const; + InputSectionBase *getLinkOrderDep() const; + // Translate an offset in the input section to an offset in the output + // section. + uintX_t getOffset(uintX_t Offset) const; + + // ELF supports ZLIB-compressed section. + // Returns true if the section is compressed. + bool isCompressed() const; + void uncompress(); + + // Returns a source location string. Used to construct an error message. + std::string getLocation(uintX_t Offset); + + void relocate(uint8_t *Buf, uint8_t *BufEnd); + +private: + std::pair<ArrayRef<uint8_t>, uint64_t> + getElfCompressedData(ArrayRef<uint8_t> Data); + + std::pair<ArrayRef<uint8_t>, uint64_t> + getRawCompressedData(ArrayRef<uint8_t> Data); +}; + +// SectionPiece represents a piece of splittable section contents. +// We allocate a lot of these and binary search on them. This means that they +// have to be as compact as possible, which is why we don't store the size (can +// be found by looking at the next one) and put the hash in a side table. +struct SectionPiece { + SectionPiece(size_t Off, bool Live = false) + : InputOff(Off), OutputOff(-1), Live(Live || !Config->GcSections) {} + + size_t InputOff; + ssize_t OutputOff : 8 * sizeof(ssize_t) - 1; + size_t Live : 1; +}; +static_assert(sizeof(SectionPiece) == 2 * sizeof(size_t), + "SectionPiece is too big"); + +// This corresponds to a SHF_MERGE section of an input file. +template <class ELFT> class MergeInputSection : public InputSectionBase<ELFT> { + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Shdr Elf_Shdr; + +public: + MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, + StringRef Name); + static bool classof(const InputSectionData *S); + void splitIntoPieces(); + + // Mark the piece at a given offset live. Used by GC. + void markLiveAt(uintX_t Offset) { + assert(this->Flags & llvm::ELF::SHF_ALLOC); + LiveOffsets.insert(Offset); + } + + // Translate an offset in the input section to an offset + // in the output section. + uintX_t getOffset(uintX_t Offset) const; + + // Splittable sections are handled as a sequence of data + // rather than a single large blob of data. + std::vector<SectionPiece> Pieces; + + // Returns I'th piece's data. This function is very hot when + // string merging is enabled, so we want to inline. + LLVM_ATTRIBUTE_ALWAYS_INLINE + llvm::CachedHashStringRef getData(size_t I) const { + size_t Begin = Pieces[I].InputOff; + size_t End; + if (Pieces.size() - 1 == I) + End = this->Data.size(); + else + End = Pieces[I + 1].InputOff; + + StringRef S = {(const char *)(this->Data.data() + Begin), End - Begin}; + return {S, Hashes[I]}; + } + + // Returns the SectionPiece at a given input section offset. + SectionPiece *getSectionPiece(uintX_t Offset); + const SectionPiece *getSectionPiece(uintX_t Offset) const; + +private: + void splitStrings(ArrayRef<uint8_t> A, size_t Size); + void splitNonStrings(ArrayRef<uint8_t> A, size_t Size); + + std::vector<uint32_t> Hashes; + + mutable llvm::DenseMap<uintX_t, uintX_t> OffsetMap; + mutable std::once_flag InitOffsetMap; + + llvm::DenseSet<uintX_t> LiveOffsets; +}; + +struct EhSectionPiece : public SectionPiece { + EhSectionPiece(size_t Off, InputSectionData *ID, uint32_t Size, + unsigned FirstRelocation) + : SectionPiece(Off, false), ID(ID), Size(Size), + FirstRelocation(FirstRelocation) {} + InputSectionData *ID; + uint32_t Size; + uint32_t size() const { return Size; } + + ArrayRef<uint8_t> data() { return {ID->Data.data() + this->InputOff, Size}; } + unsigned FirstRelocation; +}; + +// This corresponds to a .eh_frame section of an input file. +template <class ELFT> class EhInputSection : public InputSectionBase<ELFT> { +public: + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::uint uintX_t; + EhInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, StringRef Name); + static bool classof(const InputSectionData *S); + void split(); + template <class RelTy> void split(ArrayRef<RelTy> Rels); + + // Splittable sections are handled as a sequence of data + // rather than a single large blob of data. + std::vector<EhSectionPiece> Pieces; +}; + +// This corresponds to a non SHF_MERGE section of an input file. +template <class ELFT> class InputSection : public InputSectionBase<ELFT> { + typedef InputSectionBase<ELFT> Base; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + typedef InputSectionData::Kind Kind; + +public: + InputSection(); + InputSection(uintX_t Flags, uint32_t Type, uintX_t Addralign, + ArrayRef<uint8_t> Data, StringRef Name, + Kind K = InputSectionData::Regular); + InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, StringRef Name); + + static InputSection<ELFT> Discarded; + + // Write this section to a mmap'ed file, assuming Buf is pointing to + // beginning of the output section. + void writeTo(uint8_t *Buf); + + // The offset from beginning of the output sections this section was assigned + // to. The writer sets a value. + uint64_t OutSecOff = 0; + + // InputSection that is dependent on us (reverse dependency for GC) + InputSectionBase<ELFT> *DependentSection = nullptr; + + static bool classof(const InputSectionData *S); + + InputSectionBase<ELFT> *getRelocatedSection(); + + // Register thunk related to the symbol. When the section is written + // to a mmap'ed file, target is requested to write an actual thunk code. + // Now thunks is supported for MIPS and ARM target only. + void addThunk(const Thunk<ELFT> *T); + + // The offset of synthetic thunk code from beginning of this section. + uint64_t getThunkOff() const; + + // Size of chunk with thunks code. + uint64_t getThunksSize() const; + + template <class RelTy> + void relocateNonAlloc(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); + + // Used by ICF. + uint32_t Class[2] = {0, 0}; + + // Called by ICF to merge two input sections. + void replace(InputSection<ELFT> *Other); + +private: + template <class RelTy> + void copyRelocations(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); + + llvm::TinyPtrVector<const Thunk<ELFT> *> Thunks; +}; + +template <class ELFT> InputSection<ELFT> InputSection<ELFT>::Discarded; +} // namespace elf + +template <class ELFT> std::string toString(const elf::InputSectionBase<ELFT> *); +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/LTO.cpp b/contrib/llvm/tools/lld/ELF/LTO.cpp new file mode 100644 index 000000000000..b342b6195f1d --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/LTO.cpp @@ -0,0 +1,164 @@ +//===- LTO.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "LTO.h" +#include "Config.h" +#include "Error.h" +#include "InputFiles.h" +#include "Symbols.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/CommandFlags.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/LTO/Config.h" +#include "llvm/LTO/LTO.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstddef> +#include <memory> +#include <string> +#include <system_error> +#include <vector> + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +// This is for use when debugging LTO. +static void saveBuffer(StringRef Buffer, const Twine &Path) { + std::error_code EC; + raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None); + if (EC) + error(EC, "cannot create " + Path); + OS << Buffer; +} + +static void diagnosticHandler(const DiagnosticInfo &DI) { + SmallString<128> ErrStorage; + raw_svector_ostream OS(ErrStorage); + DiagnosticPrinterRawOStream DP(OS); + DI.print(DP); + warn(ErrStorage); +} + +static void checkError(Error E) { + handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) -> Error { + error(EIB.message()); + return Error::success(); + }); +} + +static std::unique_ptr<lto::LTO> createLTO() { + lto::Config Conf; + + // LLD supports the new relocations. + Conf.Options = InitTargetOptionsFromCodeGenFlags(); + Conf.Options.RelaxELFRelocations = true; + + Conf.RelocModel = Config->Pic ? Reloc::PIC_ : Reloc::Static; + Conf.DisableVerify = Config->DisableVerify; + Conf.DiagHandler = diagnosticHandler; + Conf.OptLevel = Config->LTOO; + + // Set up a custom pipeline if we've been asked to. + Conf.OptPipeline = Config->LTONewPmPasses; + Conf.AAPipeline = Config->LTOAAPipeline; + + if (Config->SaveTemps) + checkError(Conf.addSaveTemps(std::string(Config->OutputFile) + ".", + /*UseInputModulePath*/ true)); + + lto::ThinBackend Backend; + if (Config->ThinLTOJobs != -1u) + Backend = lto::createInProcessThinBackend(Config->ThinLTOJobs); + return llvm::make_unique<lto::LTO>(std::move(Conf), Backend, + Config->LTOPartitions); +} + +BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {} + +BitcodeCompiler::~BitcodeCompiler() = default; + +template <class ELFT> static void undefine(Symbol *S) { + replaceBody<Undefined<ELFT>>(S, S->body()->getName(), /*IsLocal=*/false, + STV_DEFAULT, S->body()->Type, nullptr); +} + +template <class ELFT> void BitcodeCompiler::add(BitcodeFile &F) { + lto::InputFile &Obj = *F.Obj; + unsigned SymNum = 0; + std::vector<Symbol *> Syms = F.getSymbols(); + std::vector<lto::SymbolResolution> Resols(Syms.size()); + + // Provide a resolution to the LTO API for each symbol. + for (const lto::InputFile::Symbol &ObjSym : Obj.symbols()) { + Symbol *Sym = Syms[SymNum]; + lto::SymbolResolution &R = Resols[SymNum]; + ++SymNum; + SymbolBody *B = Sym->body(); + + // Ideally we shouldn't check for SF_Undefined but currently IRObjectFile + // reports two symbols for module ASM defined. Without this check, lld + // flags an undefined in IR with a definition in ASM as prevailing. + // Once IRObjectFile is fixed to report only one symbol this hack can + // be removed. + R.Prevailing = + !(ObjSym.getFlags() & object::BasicSymbolRef::SF_Undefined) && + B->File == &F; + + R.VisibleToRegularObj = + Sym->IsUsedInRegularObj || (R.Prevailing && Sym->includeInDynsym()); + if (R.Prevailing) + undefine<ELFT>(Sym); + } + checkError(LTOObj->add(std::move(F.Obj), Resols)); +} + +// Merge all the bitcode files we have seen, codegen the result +// and return the resulting ObjectFile(s). +std::vector<InputFile *> BitcodeCompiler::compile() { + std::vector<InputFile *> Ret; + unsigned MaxTasks = LTOObj->getMaxTasks(); + Buff.resize(MaxTasks); + + checkError(LTOObj->run([&](size_t Task) { + return llvm::make_unique<lto::NativeObjectStream>( + llvm::make_unique<raw_svector_ostream>(Buff[Task])); + })); + + for (unsigned I = 0; I != MaxTasks; ++I) { + if (Buff[I].empty()) + continue; + if (Config->SaveTemps) { + if (MaxTasks == 1) + saveBuffer(Buff[I], Config->OutputFile + ".lto.o"); + else + saveBuffer(Buff[I], Config->OutputFile + Twine(I) + ".lto.o"); + } + InputFile *Obj = createObjectFile(MemoryBufferRef(Buff[I], "lto.tmp")); + Ret.push_back(Obj); + } + return Ret; +} + +template void BitcodeCompiler::template add<ELF32LE>(BitcodeFile &); +template void BitcodeCompiler::template add<ELF32BE>(BitcodeFile &); +template void BitcodeCompiler::template add<ELF64LE>(BitcodeFile &); +template void BitcodeCompiler::template add<ELF64BE>(BitcodeFile &); diff --git a/contrib/llvm/tools/lld/ELF/LTO.h b/contrib/llvm/tools/lld/ELF/LTO.h new file mode 100644 index 000000000000..3cb763650e1c --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/LTO.h @@ -0,0 +1,56 @@ +//===- LTO.h ----------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides a way to combine bitcode files into one ELF +// file by compiling them using LLVM. +// +// If LTO is in use, your input files are not in regular ELF files +// but instead LLVM bitcode files. In that case, the linker has to +// convert bitcode files into the native format so that we can create +// an ELF file that contains native code. This file provides that +// functionality. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_LTO_H +#define LLD_ELF_LTO_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/SmallString.h" +#include <memory> +#include <vector> + +namespace llvm { +namespace lto { +class LTO; +} +} + +namespace lld { +namespace elf { + +class BitcodeFile; +class InputFile; + +class BitcodeCompiler { +public: + BitcodeCompiler(); + ~BitcodeCompiler(); + + template <class ELFT> void add(BitcodeFile &F); + std::vector<InputFile *> compile(); + +private: + std::unique_ptr<llvm::lto::LTO> LTOObj; + std::vector<SmallString<0>> Buff; +}; +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp new file mode 100644 index 000000000000..59ef36c87de5 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp @@ -0,0 +1,1966 @@ +//===- LinkerScript.cpp ---------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the parser/evaluator of the linker script. +// +//===----------------------------------------------------------------------===// + +#include "LinkerScript.h" +#include "Config.h" +#include "Driver.h" +#include "InputSection.h" +#include "Memory.h" +#include "OutputSections.h" +#include "ScriptParser.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <limits> +#include <memory> +#include <string> +#include <tuple> +#include <vector> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace lld; +using namespace lld::elf; + +LinkerScriptBase *elf::ScriptBase; +ScriptConfiguration *elf::ScriptConfig; + +template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { + uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; + Symbol *Sym = Symtab<ELFT>::X->addUndefined( + Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, + /*Type*/ 0, + /*CanOmitFromDynSym*/ false, /*File*/ nullptr); + + replaceBody<DefinedRegular<ELFT>>(Sym, Cmd->Name, /*IsLocal=*/false, + Visibility, STT_NOTYPE, 0, 0, nullptr, + nullptr); + return Sym->body(); +} + +template <class ELFT> static SymbolBody *addSynthetic(SymbolAssignment *Cmd) { + uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; + const OutputSectionBase *Sec = + ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); + Symbol *Sym = Symtab<ELFT>::X->addUndefined( + Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, + /*Type*/ 0, + /*CanOmitFromDynSym*/ false, /*File*/ nullptr); + + replaceBody<DefinedSynthetic>(Sym, Cmd->Name, 0, Sec); + return Sym->body(); +} + +static bool isUnderSysroot(StringRef Path) { + if (Config->Sysroot == "") + return false; + for (; !Path.empty(); Path = sys::path::parent_path(Path)) + if (sys::fs::equivalent(Config->Sysroot, Path)) + return true; + return false; +} + +template <class ELFT> static void assignSymbol(SymbolAssignment *Cmd) { + // If there are sections, then let the value be assigned later in + // `assignAddresses`. + if (ScriptConfig->HasSections) + return; + + uint64_t Value = Cmd->Expression(0); + if (Cmd->Expression.IsAbsolute()) { + cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Value; + } else { + const OutputSectionBase *Sec = Cmd->Expression.Section(); + if (Sec) + cast<DefinedSynthetic>(Cmd->Sym)->Value = Value - Sec->Addr; + } +} + +template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { + if (Cmd->Name == ".") + return; + + // If a symbol was in PROVIDE(), we need to define it only when + // it is a referenced undefined symbol. + SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); + if (Cmd->Provide && (!B || B->isDefined())) + return; + + // Otherwise, create a new symbol if one does not exist or an + // undefined one does exist. + if (Cmd->Expression.IsAbsolute()) + Cmd->Sym = addRegular<ELFT>(Cmd); + else + Cmd->Sym = addSynthetic<ELFT>(Cmd); + assignSymbol<ELFT>(Cmd); +} + +bool SymbolAssignment::classof(const BaseCommand *C) { + return C->Kind == AssignmentKind; +} + +bool OutputSectionCommand::classof(const BaseCommand *C) { + return C->Kind == OutputSectionKind; +} + +bool InputSectionDescription::classof(const BaseCommand *C) { + return C->Kind == InputSectionKind; +} + +bool AssertCommand::classof(const BaseCommand *C) { + return C->Kind == AssertKind; +} + +bool BytesDataCommand::classof(const BaseCommand *C) { + return C->Kind == BytesDataKind; +} + +template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; +template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; + +template <class ELFT> static StringRef basename(InputSectionBase<ELFT> *S) { + if (S->getFile()) + return sys::path::filename(S->getFile()->getName()); + return ""; +} + +template <class ELFT> +bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { + for (InputSectionDescription *ID : Opt.KeptSections) + if (ID->FilePat.match(basename(S))) + for (SectionPattern &P : ID->SectionPatterns) + if (P.SectionPat.match(S->Name)) + return true; + return false; +} + +static bool comparePriority(InputSectionData *A, InputSectionData *B) { + return getPriority(A->Name) < getPriority(B->Name); +} + +static bool compareName(InputSectionData *A, InputSectionData *B) { + return A->Name < B->Name; +} + +static bool compareAlignment(InputSectionData *A, InputSectionData *B) { + // ">" is not a mistake. Larger alignments are placed before smaller + // alignments in order to reduce the amount of padding necessary. + // This is compatible with GNU. + return A->Alignment > B->Alignment; +} + +static std::function<bool(InputSectionData *, InputSectionData *)> +getComparator(SortSectionPolicy K) { + switch (K) { + case SortSectionPolicy::Alignment: + return compareAlignment; + case SortSectionPolicy::Name: + return compareName; + case SortSectionPolicy::Priority: + return comparePriority; + default: + llvm_unreachable("unknown sort policy"); + } +} + +template <class ELFT> +static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, + ConstraintKind Kind) { + if (Kind == ConstraintKind::NoConstraint) + return true; + bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { + auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); + return Sec->Flags & SHF_WRITE; + }); + return (IsRW && Kind == ConstraintKind::ReadWrite) || + (!IsRW && Kind == ConstraintKind::ReadOnly); +} + +static void sortSections(InputSectionData **Begin, InputSectionData **End, + SortSectionPolicy K) { + if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) + std::stable_sort(Begin, End, getComparator(K)); +} + +// Compute and remember which sections the InputSectionDescription matches. +template <class ELFT> +void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { + // Collects all sections that satisfy constraints of I + // and attach them to I. + for (SectionPattern &Pat : I->SectionPatterns) { + size_t SizeBefore = I->Sections.size(); + + for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) { + if (!S->Live || S->Assigned) + continue; + + StringRef Filename = basename(S); + if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) + continue; + if (!Pat.SectionPat.match(S->Name)) + continue; + I->Sections.push_back(S); + S->Assigned = true; + } + + // Sort sections as instructed by SORT-family commands and --sort-section + // option. Because SORT-family commands can be nested at most two depth + // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command + // line option is respected even if a SORT command is given, the exact + // behavior we have here is a bit complicated. Here are the rules. + // + // 1. If two SORT commands are given, --sort-section is ignored. + // 2. If one SORT command is given, and if it is not SORT_NONE, + // --sort-section is handled as an inner SORT command. + // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. + // 4. If no SORT command is given, sort according to --sort-section. + InputSectionData **Begin = I->Sections.data() + SizeBefore; + InputSectionData **End = I->Sections.data() + I->Sections.size(); + if (Pat.SortOuter != SortSectionPolicy::None) { + if (Pat.SortInner == SortSectionPolicy::Default) + sortSections(Begin, End, Config->SortSection); + else + sortSections(Begin, End, Pat.SortInner); + sortSections(Begin, End, Pat.SortOuter); + } + } +} + +template <class ELFT> +void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { + for (InputSectionBase<ELFT> *S : V) { + S->Live = false; + reportDiscarded(S); + } +} + +template <class ELFT> +std::vector<InputSectionBase<ELFT> *> +LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { + std::vector<InputSectionBase<ELFT> *> Ret; + + for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { + auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); + if (!Cmd) + continue; + computeInputSections(Cmd); + for (InputSectionData *S : Cmd->Sections) + Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); + } + + return Ret; +} + +template <class ELFT> +void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, + InputSectionBase<ELFT> *Sec, + StringRef Name) { + OutputSectionBase *OutSec; + bool IsNew; + std::tie(OutSec, IsNew) = Factory.create(Sec, Name); + if (IsNew) + OutputSections->push_back(OutSec); + OutSec->addSection(Sec); +} + +template <class ELFT> +void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { + for (unsigned I = 0; I < Opt.Commands.size(); ++I) { + auto Iter = Opt.Commands.begin() + I; + const std::unique_ptr<BaseCommand> &Base1 = *Iter; + + // Handle symbol assignments outside of any output section. + if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { + addSymbol<ELFT>(Cmd); + continue; + } + + if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { + // If we don't have SECTIONS then output sections have already been + // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses + // will not be called, so ASSERT should be evaluated now. + if (!Opt.HasSections) + Cmd->Expression(0); + continue; + } + + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { + std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); + + // The output section name `/DISCARD/' is special. + // Any input section assigned to it is discarded. + if (Cmd->Name == "/DISCARD/") { + discard(V); + continue; + } + + // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive + // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input + // sections satisfy a given constraint. If not, a directive is handled + // as if it wasn't present from the beginning. + // + // Because we'll iterate over Commands many more times, the easiest + // way to "make it as if it wasn't present" is to just remove it. + if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { + for (InputSectionBase<ELFT> *S : V) + S->Assigned = false; + Opt.Commands.erase(Iter); + --I; + continue; + } + + // A directive may contain symbol definitions like this: + // ".foo : { ...; bar = .; }". Handle them. + for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) + if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) + addSymbol<ELFT>(OutCmd); + + // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign + // is given, input sections are aligned to that value, whether the + // given value is larger or smaller than the original section alignment. + if (Cmd->SubalignExpr) { + uint32_t Subalign = Cmd->SubalignExpr(0); + for (InputSectionBase<ELFT> *S : V) + S->Alignment = Subalign; + } + + // Add input sections to an output section. + for (InputSectionBase<ELFT> *S : V) + addSection(Factory, S, Cmd->Name); + } + } +} + +// Add sections that didn't match any sections command. +template <class ELFT> +void LinkerScript<ELFT>::addOrphanSections( + OutputSectionFactory<ELFT> &Factory) { + for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) + if (S->Live && !S->OutSec) + addSection(Factory, S, getOutputSectionName(S->Name)); +} + +// Sets value of a section-defined symbol. Two kinds of +// symbols are processed: synthetic symbols, whose value +// is an offset from beginning of section and regular +// symbols whose value is absolute. +template <class ELFT> +static void assignSectionSymbol(SymbolAssignment *Cmd, + typename ELFT::uint Value) { + if (!Cmd->Sym) + return; + + if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { + Body->Section = Cmd->Expression.Section(); + Body->Value = Cmd->Expression(Value) - Body->Section->Addr; + return; + } + auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); + Body->Value = Cmd->Expression(Value); +} + +template <class ELFT> static bool isTbss(OutputSectionBase *Sec) { + return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; +} + +template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { + if (!AlreadyOutputIS.insert(S).second) + return; + bool IsTbss = isTbss<ELFT>(CurOutSec); + + uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; + Pos = alignTo(Pos, S->Alignment); + S->OutSecOff = Pos - CurOutSec->Addr; + Pos += S->getSize(); + + // Update output section size after adding each section. This is so that + // SIZEOF works correctly in the case below: + // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } + CurOutSec->Size = Pos - CurOutSec->Addr; + + if (IsTbss) + ThreadBssOffset = Pos - Dot; + else + Dot = Pos; +} + +template <class ELFT> void LinkerScript<ELFT>::flush() { + if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) + return; + if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { + for (InputSection<ELFT> *I : OutSec->Sections) + output(I); + } else { + Dot += CurOutSec->Size; + } +} + +template <class ELFT> +void LinkerScript<ELFT>::switchTo(OutputSectionBase *Sec) { + if (CurOutSec == Sec) + return; + if (AlreadyOutputOS.count(Sec)) + return; + + flush(); + CurOutSec = Sec; + + Dot = alignTo(Dot, CurOutSec->Addralign); + CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; + + // If neither AT nor AT> is specified for an allocatable section, the linker + // will set the LMA such that the difference between VMA and LMA for the + // section is the same as the preceding output section in the same region + // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html + CurOutSec->setLMAOffset(LMAOffset); +} + +template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { + // This handles the assignments to symbol or to a location counter (.) + if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { + if (AssignCmd->Name == ".") { + // Update to location counter means update to section size. + uintX_t Val = AssignCmd->Expression(Dot); + if (Val < Dot) + error("unable to move location counter backward for: " + + CurOutSec->Name); + Dot = Val; + CurOutSec->Size = Dot - CurOutSec->Addr; + return; + } + assignSectionSymbol<ELFT>(AssignCmd, Dot); + return; + } + + // Handle BYTE(), SHORT(), LONG(), or QUAD(). + if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { + DataCmd->Offset = Dot - CurOutSec->Addr; + Dot += DataCmd->Size; + CurOutSec->Size = Dot - CurOutSec->Addr; + return; + } + + if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { + AssertCmd->Expression(Dot); + return; + } + + // It handles single input section description command, + // calculates and assigns the offsets for each section and also + // updates the output section size. + auto &ICmd = cast<InputSectionDescription>(Base); + for (InputSectionData *ID : ICmd.Sections) { + // We tentatively added all synthetic sections at the beginning and removed + // empty ones afterwards (because there is no way to know whether they were + // going be empty or not other than actually running linker scripts.) + // We need to ignore remains of empty sections. + if (auto *Sec = dyn_cast<SyntheticSection<ELFT>>(ID)) + if (Sec->empty()) + continue; + + auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); + switchTo(IB->OutSec); + if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) + output(I); + else + flush(); + } +} + +template <class ELFT> +static std::vector<OutputSectionBase *> +findSections(StringRef Name, const std::vector<OutputSectionBase *> &Sections) { + std::vector<OutputSectionBase *> Ret; + for (OutputSectionBase *Sec : Sections) + if (Sec->getName() == Name) + Ret.push_back(Sec); + return Ret; +} + +// This function assigns offsets to input sections and an output section +// for a single sections command (e.g. ".text { *(.text); }"). +template <class ELFT> +void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { + if (Cmd->LMAExpr) + LMAOffset = Cmd->LMAExpr(Dot) - Dot; + std::vector<OutputSectionBase *> Sections = + findSections<ELFT>(Cmd->Name, *OutputSections); + if (Sections.empty()) + return; + switchTo(Sections[0]); + + // Find the last section output location. We will output orphan sections + // there so that end symbols point to the correct location. + auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), + [](const std::unique_ptr<BaseCommand> &Cmd) { + return !isa<SymbolAssignment>(*Cmd); + }) + .base(); + for (auto I = Cmd->Commands.begin(); I != E; ++I) + process(**I); + for (OutputSectionBase *Base : Sections) + switchTo(Base); + flush(); + std::for_each(E, Cmd->Commands.end(), + [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); +} + +template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { + // It is common practice to use very generic linker scripts. So for any + // given run some of the output sections in the script will be empty. + // We could create corresponding empty output sections, but that would + // clutter the output. + // We instead remove trivially empty sections. The bfd linker seems even + // more aggressive at removing them. + auto Pos = std::remove_if( + Opt.Commands.begin(), Opt.Commands.end(), + [&](const std::unique_ptr<BaseCommand> &Base) { + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) + return findSections<ELFT>(Cmd->Name, *OutputSections).empty(); + return false; + }); + Opt.Commands.erase(Pos, Opt.Commands.end()); +} + +static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { + for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) + if (!isa<InputSectionDescription>(*I)) + return false; + return true; +} + +template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { + // If the output section contains only symbol assignments, create a + // corresponding output section. The bfd linker seems to only create them if + // '.' is assigned to, but creating these section should not have any bad + // consequeces and gives us a section to put the symbol in. + uintX_t Flags = SHF_ALLOC; + uint32_t Type = SHT_NOBITS; + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + if (!Cmd) + continue; + std::vector<OutputSectionBase *> Secs = + findSections<ELFT>(Cmd->Name, *OutputSections); + if (!Secs.empty()) { + Flags = Secs[0]->Flags; + Type = Secs[0]->Type; + continue; + } + + if (isAllSectionDescription(*Cmd)) + continue; + + auto *OutSec = make<OutputSection<ELFT>>(Cmd->Name, Type, Flags); + OutputSections->push_back(OutSec); + } +} + +template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { + placeOrphanSections(); + + // If output section command doesn't specify any segments, + // and we haven't previously assigned any section to segment, + // then we simply assign section to the very first load segment. + // Below is an example of such linker script: + // PHDRS { seg PT_LOAD; } + // SECTIONS { .aaa : { *(.aaa) } } + std::vector<StringRef> DefPhdrs; + auto FirstPtLoad = + std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), + [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); + if (FirstPtLoad != Opt.PhdrsCommands.end()) + DefPhdrs.push_back(FirstPtLoad->Name); + + // Walk the commands and propagate the program headers to commands that don't + // explicitly specify them. + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + if (!Cmd) + continue; + if (Cmd->Phdrs.empty()) + Cmd->Phdrs = DefPhdrs; + else + DefPhdrs = Cmd->Phdrs; + } + + removeEmptyCommands(); +} + +// When placing orphan sections, we want to place them after symbol assignments +// so that an orphan after +// begin_foo = .; +// foo : { *(foo) } +// end_foo = .; +// doesn't break the intended meaning of the begin/end symbols. +// We don't want to go over sections since Writer<ELFT>::sortSections is the +// one in charge of deciding the order of the sections. +// We don't want to go over alignments, since doing so in +// rx_sec : { *(rx_sec) } +// . = ALIGN(0x1000); +// /* The RW PT_LOAD starts here*/ +// rw_sec : { *(rw_sec) } +// would mean that the RW PT_LOAD would become unaligned. +static bool shouldSkip(const BaseCommand &Cmd) { + if (isa<OutputSectionCommand>(Cmd)) + return false; + const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); + if (!Assign) + return true; + return Assign->Name != "."; +} + +// Orphan sections are sections present in the input files which are not +// explicitly placed into the output file by the linker script. This just +// places them in the order already decided in OutputSections. +template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { + // The OutputSections are already in the correct order. + // This loops creates or moves commands as needed so that they are in the + // correct order. + int CmdIndex = 0; + + // As a horrible special case, skip the first . assignment if it is before any + // section. We do this because it is common to set a load address by starting + // the script with ". = 0xabcd" and the expectation is that every section is + // after that. + auto FirstSectionOrDotAssignment = + std::find_if(Opt.Commands.begin(), Opt.Commands.end(), + [](const std::unique_ptr<BaseCommand> &Cmd) { + if (isa<OutputSectionCommand>(*Cmd)) + return true; + const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); + if (!Assign) + return false; + return Assign->Name == "."; + }); + if (FirstSectionOrDotAssignment != Opt.Commands.end()) { + CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); + if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) + ++CmdIndex; + } + + for (OutputSectionBase *Sec : *OutputSections) { + StringRef Name = Sec->getName(); + + // Find the last spot where we can insert a command and still get the + // correct result. + auto CmdIter = Opt.Commands.begin() + CmdIndex; + auto E = Opt.Commands.end(); + while (CmdIter != E && shouldSkip(**CmdIter)) { + ++CmdIter; + ++CmdIndex; + } + + auto Pos = + std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + return Cmd && Cmd->Name == Name; + }); + if (Pos == E) { + Opt.Commands.insert(CmdIter, + llvm::make_unique<OutputSectionCommand>(Name)); + ++CmdIndex; + continue; + } + + // Continue from where we found it. + CmdIndex = (Pos - Opt.Commands.begin()) + 1; + } +} + +template <class ELFT> +void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { + // Assign addresses as instructed by linker script SECTIONS sub-commands. + Dot = 0; + + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { + if (Cmd->Name == ".") { + Dot = Cmd->Expression(Dot); + } else if (Cmd->Sym) { + assignSectionSymbol<ELFT>(Cmd, Dot); + } + continue; + } + + if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { + Cmd->Expression(Dot); + continue; + } + + auto *Cmd = cast<OutputSectionCommand>(Base.get()); + if (Cmd->AddrExpr) + Dot = Cmd->AddrExpr(Dot); + assignOffsets(Cmd); + } + + uintX_t MinVA = std::numeric_limits<uintX_t>::max(); + for (OutputSectionBase *Sec : *OutputSections) { + if (Sec->Flags & SHF_ALLOC) + MinVA = std::min<uint64_t>(MinVA, Sec->Addr); + else + Sec->Addr = 0; + } + + uintX_t HeaderSize = getHeaderSize(); + // If the linker script doesn't have PHDRS, add ElfHeader and ProgramHeaders + // now that we know we have space. + if (HeaderSize <= MinVA && !hasPhdrsCommands()) + allocateHeaders<ELFT>(Phdrs, *OutputSections); + + // ELF and Program headers need to be right before the first section in + // memory. Set their addresses accordingly. + MinVA = alignDown(MinVA - HeaderSize, Config->MaxPageSize); + Out<ELFT>::ElfHeader->Addr = MinVA; + Out<ELFT>::ProgramHeaders->Addr = Out<ELFT>::ElfHeader->Size + MinVA; +} + +// Creates program headers as instructed by PHDRS linker script command. +template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { + std::vector<PhdrEntry> Ret; + + // Process PHDRS and FILEHDR keywords because they are not + // real output sections and cannot be added in the following loop. + for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { + Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); + PhdrEntry &Phdr = Ret.back(); + + if (Cmd.HasFilehdr) + Phdr.add(Out<ELFT>::ElfHeader); + if (Cmd.HasPhdrs) + Phdr.add(Out<ELFT>::ProgramHeaders); + + if (Cmd.LMAExpr) { + Phdr.p_paddr = Cmd.LMAExpr(0); + Phdr.HasLMA = true; + } + } + + // Add output sections to program headers. + for (OutputSectionBase *Sec : *OutputSections) { + if (!(Sec->Flags & SHF_ALLOC)) + break; + + // Assign headers specified by linker script + for (size_t Id : getPhdrIndices(Sec->getName())) { + Ret[Id].add(Sec); + if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) + Ret[Id].p_flags |= Sec->getPhdrFlags(); + } + } + return Ret; +} + +template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { + // Ignore .interp section in case we have PHDRS specification + // and PT_INTERP isn't listed. + return !Opt.PhdrsCommands.empty() && + llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { + return Cmd.Type == PT_INTERP; + }) == Opt.PhdrsCommands.end(); +} + +template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) + if (Cmd->Name == Name) + return Cmd->Filler; + return 0; +} + +template <class ELFT> +static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { + const endianness E = ELFT::TargetEndianness; + + switch (Size) { + case 1: + *Buf = (uint8_t)Data; + break; + case 2: + write16<E>(Buf, Data); + break; + case 4: + write32<E>(Buf, Data); + break; + case 8: + write64<E>(Buf, Data); + break; + default: + llvm_unreachable("unsupported Size argument"); + } +} + +template <class ELFT> +void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { + int I = getSectionIndex(Name); + if (I == INT_MAX) + return; + + auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); + for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) + if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) + writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); +} + +template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) + if (Cmd->LMAExpr && Cmd->Name == Name) + return true; + return false; +} + +// Returns the index of the given section name in linker script +// SECTIONS commands. Sections are laid out as the same order as they +// were in the script. If a given name did not appear in the script, +// it returns INT_MAX, so that it will be laid out at end of file. +template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { + for (int I = 0, E = Opt.Commands.size(); I != E; ++I) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) + if (Cmd->Name == Name) + return I; + return INT_MAX; +} + +template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { + return !Opt.PhdrsCommands.empty(); +} + +template <class ELFT> +const OutputSectionBase *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, + StringRef Name) { + static OutputSectionBase FakeSec("", 0, 0); + + for (OutputSectionBase *Sec : *OutputSections) + if (Sec->getName() == Name) + return Sec; + + error(Loc + ": undefined section " + Name); + return &FakeSec; +} + +// This function is essentially the same as getOutputSection(Name)->Size, +// but it won't print out an error message if a given section is not found. +// +// Linker script does not create an output section if its content is empty. +// We want to allow SIZEOF(.foo) where .foo is a section which happened to +// be empty. That is why this function is different from getOutputSection(). +template <class ELFT> +uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { + for (OutputSectionBase *Sec : *OutputSections) + if (Sec->getName() == Name) + return Sec->Size; + return 0; +} + +template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { + return elf::getHeaderSize<ELFT>(); +} + +template <class ELFT> +uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { + if (SymbolBody *B = Symtab<ELFT>::X->find(S)) + return B->getVA<ELFT>(); + error(Loc + ": symbol not found: " + S); + return 0; +} + +template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { + return Symtab<ELFT>::X->find(S) != nullptr; +} + +template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { + SymbolBody *Sym = Symtab<ELFT>::X->find(S); + auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym); + return DR && !DR->Section; +} + +// Gets section symbol belongs to. Symbol "." doesn't belong to any +// specific section but isn't absolute at the same time, so we try +// to find suitable section for it as well. +template <class ELFT> +const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) { + SymbolBody *Sym = Symtab<ELFT>::X->find(S); + if (!Sym) { + if (OutputSections->empty()) + return nullptr; + return CurOutSec ? CurOutSec : (*OutputSections)[0]; + } + + if (auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym)) + return DR->Section ? DR->Section->OutSec : nullptr; + if (auto *DS = dyn_cast_or_null<DefinedSynthetic>(Sym)) + return DS->Section; + + return nullptr; +} + +// Returns indices of ELF headers containing specific section, identified +// by Name. Each index is a zero based number of ELF header listed within +// PHDRS {} script block. +template <class ELFT> +std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + if (!Cmd || Cmd->Name != SectionName) + continue; + + std::vector<size_t> Ret; + for (StringRef PhdrName : Cmd->Phdrs) + Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); + return Ret; + } + return {}; +} + +template <class ELFT> +size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { + size_t I = 0; + for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { + if (Cmd.Name == PhdrName) + return I; + ++I; + } + error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); + return 0; +} + +class elf::ScriptParser final : public ScriptParserBase { + typedef void (ScriptParser::*Handler)(); + +public: + ScriptParser(MemoryBufferRef MB) + : ScriptParserBase(MB), + IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} + + void readLinkerScript(); + void readVersionScript(); + void readDynamicList(); + +private: + void addFile(StringRef Path); + + void readAsNeeded(); + void readEntry(); + void readExtern(); + void readGroup(); + void readInclude(); + void readOutput(); + void readOutputArch(); + void readOutputFormat(); + void readPhdrs(); + void readSearchDir(); + void readSections(); + void readVersion(); + void readVersionScriptCommand(); + + SymbolAssignment *readAssignment(StringRef Name); + BytesDataCommand *readBytesDataCommand(StringRef Tok); + uint32_t readFill(); + OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); + uint32_t readOutputSectionFiller(StringRef Tok); + std::vector<StringRef> readOutputSectionPhdrs(); + InputSectionDescription *readInputSectionDescription(StringRef Tok); + StringMatcher readFilePatterns(); + std::vector<SectionPattern> readInputSectionsList(); + InputSectionDescription *readInputSectionRules(StringRef FilePattern); + unsigned readPhdrType(); + SortSectionPolicy readSortKind(); + SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); + SymbolAssignment *readProvideOrAssignment(StringRef Tok); + void readSort(); + Expr readAssert(); + + Expr readExpr(); + Expr readExpr1(Expr Lhs, int MinPrec); + StringRef readParenLiteral(); + Expr readPrimary(); + Expr readTernary(Expr Cond); + Expr readParenExpr(); + + // For parsing version script. + std::vector<SymbolVersion> readVersionExtern(); + void readAnonymousDeclaration(); + void readVersionDeclaration(StringRef VerStr); + std::vector<SymbolVersion> readSymbols(); + + ScriptConfiguration &Opt = *ScriptConfig; + bool IsUnderSysroot; +}; + +void ScriptParser::readDynamicList() { + expect("{"); + readAnonymousDeclaration(); + if (!atEOF()) + setError("EOF expected, but got " + next()); +} + +void ScriptParser::readVersionScript() { + readVersionScriptCommand(); + if (!atEOF()) + setError("EOF expected, but got " + next()); +} + +void ScriptParser::readVersionScriptCommand() { + if (consume("{")) { + readAnonymousDeclaration(); + return; + } + + while (!atEOF() && !Error && peek() != "}") { + StringRef VerStr = next(); + if (VerStr == "{") { + setError("anonymous version definition is used in " + "combination with other version definitions"); + return; + } + expect("{"); + readVersionDeclaration(VerStr); + } +} + +void ScriptParser::readVersion() { + expect("{"); + readVersionScriptCommand(); + expect("}"); +} + +void ScriptParser::readLinkerScript() { + while (!atEOF()) { + StringRef Tok = next(); + if (Tok == ";") + continue; + + if (Tok == "ASSERT") { + Opt.Commands.emplace_back(new AssertCommand(readAssert())); + } else if (Tok == "ENTRY") { + readEntry(); + } else if (Tok == "EXTERN") { + readExtern(); + } else if (Tok == "GROUP" || Tok == "INPUT") { + readGroup(); + } else if (Tok == "INCLUDE") { + readInclude(); + } else if (Tok == "OUTPUT") { + readOutput(); + } else if (Tok == "OUTPUT_ARCH") { + readOutputArch(); + } else if (Tok == "OUTPUT_FORMAT") { + readOutputFormat(); + } else if (Tok == "PHDRS") { + readPhdrs(); + } else if (Tok == "SEARCH_DIR") { + readSearchDir(); + } else if (Tok == "SECTIONS") { + readSections(); + } else if (Tok == "VERSION") { + readVersion(); + } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { + Opt.Commands.emplace_back(Cmd); + } else { + setError("unknown directive: " + Tok); + } + } +} + +void ScriptParser::addFile(StringRef S) { + if (IsUnderSysroot && S.startswith("/")) { + SmallString<128> PathData; + StringRef Path = (Config->Sysroot + S).toStringRef(PathData); + if (sys::fs::exists(Path)) { + Driver->addFile(Saver.save(Path)); + return; + } + } + + if (sys::path::is_absolute(S)) { + Driver->addFile(S); + } else if (S.startswith("=")) { + if (Config->Sysroot.empty()) + Driver->addFile(S.substr(1)); + else + Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); + } else if (S.startswith("-l")) { + Driver->addLibrary(S.substr(2)); + } else if (sys::fs::exists(S)) { + Driver->addFile(S); + } else { + if (Optional<std::string> Path = findFromSearchPaths(S)) + Driver->addFile(Saver.save(*Path)); + else + setError("unable to find " + S); + } +} + +void ScriptParser::readAsNeeded() { + expect("("); + bool Orig = Config->AsNeeded; + Config->AsNeeded = true; + while (!Error && !consume(")")) + addFile(unquote(next())); + Config->AsNeeded = Orig; +} + +void ScriptParser::readEntry() { + // -e <symbol> takes predecence over ENTRY(<symbol>). + expect("("); + StringRef Tok = next(); + if (Config->Entry.empty()) + Config->Entry = Tok; + expect(")"); +} + +void ScriptParser::readExtern() { + expect("("); + while (!Error && !consume(")")) + Config->Undefined.push_back(next()); +} + +void ScriptParser::readGroup() { + expect("("); + while (!Error && !consume(")")) { + StringRef Tok = next(); + if (Tok == "AS_NEEDED") + readAsNeeded(); + else + addFile(unquote(Tok)); + } +} + +void ScriptParser::readInclude() { + StringRef Tok = unquote(next()); + + // https://sourceware.org/binutils/docs/ld/File-Commands.html: + // The file will be searched for in the current directory, and in any + // directory specified with the -L option. + if (sys::fs::exists(Tok)) { + if (Optional<MemoryBufferRef> MB = readFile(Tok)) + tokenize(*MB); + return; + } + if (Optional<std::string> Path = findFromSearchPaths(Tok)) { + if (Optional<MemoryBufferRef> MB = readFile(*Path)) + tokenize(*MB); + return; + } + setError("cannot open " + Tok); +} + +void ScriptParser::readOutput() { + // -o <file> takes predecence over OUTPUT(<file>). + expect("("); + StringRef Tok = next(); + if (Config->OutputFile.empty()) + Config->OutputFile = unquote(Tok); + expect(")"); +} + +void ScriptParser::readOutputArch() { + // Error checking only for now. + expect("("); + skip(); + expect(")"); +} + +void ScriptParser::readOutputFormat() { + // Error checking only for now. + expect("("); + skip(); + StringRef Tok = next(); + if (Tok == ")") + return; + if (Tok != ",") { + setError("unexpected token: " + Tok); + return; + } + skip(); + expect(","); + skip(); + expect(")"); +} + +void ScriptParser::readPhdrs() { + expect("{"); + while (!Error && !consume("}")) { + StringRef Tok = next(); + Opt.PhdrsCommands.push_back( + {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); + PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); + + PhdrCmd.Type = readPhdrType(); + do { + Tok = next(); + if (Tok == ";") + break; + if (Tok == "FILEHDR") + PhdrCmd.HasFilehdr = true; + else if (Tok == "PHDRS") + PhdrCmd.HasPhdrs = true; + else if (Tok == "AT") + PhdrCmd.LMAExpr = readParenExpr(); + else if (Tok == "FLAGS") { + expect("("); + // Passing 0 for the value of dot is a bit of a hack. It means that + // we accept expressions like ".|1". + PhdrCmd.Flags = readExpr()(0); + expect(")"); + } else + setError("unexpected header attribute: " + Tok); + } while (!Error); + } +} + +void ScriptParser::readSearchDir() { + expect("("); + StringRef Tok = next(); + if (!Config->Nostdlib) + Config->SearchPaths.push_back(unquote(Tok)); + expect(")"); +} + +void ScriptParser::readSections() { + Opt.HasSections = true; + // -no-rosegment is used to avoid placing read only non-executable sections in + // their own segment. We do the same if SECTIONS command is present in linker + // script. See comment for computeFlags(). + Config->SingleRoRx = true; + + expect("{"); + while (!Error && !consume("}")) { + StringRef Tok = next(); + BaseCommand *Cmd = readProvideOrAssignment(Tok); + if (!Cmd) { + if (Tok == "ASSERT") + Cmd = new AssertCommand(readAssert()); + else + Cmd = readOutputSectionDescription(Tok); + } + Opt.Commands.emplace_back(Cmd); + } +} + +static int precedence(StringRef Op) { + return StringSwitch<int>(Op) + .Cases("*", "/", 5) + .Cases("+", "-", 4) + .Cases("<<", ">>", 3) + .Cases("<", "<=", ">", ">=", "==", "!=", 2) + .Cases("&", "|", 1) + .Default(-1); +} + +StringMatcher ScriptParser::readFilePatterns() { + std::vector<StringRef> V; + while (!Error && !consume(")")) + V.push_back(next()); + return StringMatcher(V); +} + +SortSectionPolicy ScriptParser::readSortKind() { + if (consume("SORT") || consume("SORT_BY_NAME")) + return SortSectionPolicy::Name; + if (consume("SORT_BY_ALIGNMENT")) + return SortSectionPolicy::Alignment; + if (consume("SORT_BY_INIT_PRIORITY")) + return SortSectionPolicy::Priority; + if (consume("SORT_NONE")) + return SortSectionPolicy::None; + return SortSectionPolicy::Default; +} + +// Method reads a list of sequence of excluded files and section globs given in +// a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ +// Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) +// The semantics of that is next: +// * Include .foo.1 from every file. +// * Include .foo.2 from every file but a.o +// * Include .foo.3 from every file but b.o +std::vector<SectionPattern> ScriptParser::readInputSectionsList() { + std::vector<SectionPattern> Ret; + while (!Error && peek() != ")") { + StringMatcher ExcludeFilePat; + if (consume("EXCLUDE_FILE")) { + expect("("); + ExcludeFilePat = readFilePatterns(); + } + + std::vector<StringRef> V; + while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") + V.push_back(next()); + + if (!V.empty()) + Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); + else + setError("section pattern is expected"); + } + return Ret; +} + +// Reads contents of "SECTIONS" directive. That directive contains a +// list of glob patterns for input sections. The grammar is as follows. +// +// <patterns> ::= <section-list> +// | <sort> "(" <section-list> ")" +// | <sort> "(" <sort> "(" <section-list> ")" ")" +// +// <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" +// | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" +// +// <section-list> is parsed by readInputSectionsList(). +InputSectionDescription * +ScriptParser::readInputSectionRules(StringRef FilePattern) { + auto *Cmd = new InputSectionDescription(FilePattern); + expect("("); + while (!Error && !consume(")")) { + SortSectionPolicy Outer = readSortKind(); + SortSectionPolicy Inner = SortSectionPolicy::Default; + std::vector<SectionPattern> V; + if (Outer != SortSectionPolicy::Default) { + expect("("); + Inner = readSortKind(); + if (Inner != SortSectionPolicy::Default) { + expect("("); + V = readInputSectionsList(); + expect(")"); + } else { + V = readInputSectionsList(); + } + expect(")"); + } else { + V = readInputSectionsList(); + } + + for (SectionPattern &Pat : V) { + Pat.SortInner = Inner; + Pat.SortOuter = Outer; + } + + std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); + } + return Cmd; +} + +InputSectionDescription * +ScriptParser::readInputSectionDescription(StringRef Tok) { + // Input section wildcard can be surrounded by KEEP. + // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep + if (Tok == "KEEP") { + expect("("); + StringRef FilePattern = next(); + InputSectionDescription *Cmd = readInputSectionRules(FilePattern); + expect(")"); + Opt.KeptSections.push_back(Cmd); + return Cmd; + } + return readInputSectionRules(Tok); +} + +void ScriptParser::readSort() { + expect("("); + expect("CONSTRUCTORS"); + expect(")"); +} + +Expr ScriptParser::readAssert() { + expect("("); + Expr E = readExpr(); + expect(","); + StringRef Msg = unquote(next()); + expect(")"); + return [=](uint64_t Dot) { + uint64_t V = E(Dot); + if (!V) + error(Msg); + return V; + }; +} + +// Reads a FILL(expr) command. We handle the FILL command as an +// alias for =fillexp section attribute, which is different from +// what GNU linkers do. +// https://sourceware.org/binutils/docs/ld/Output-Section-Data.html +uint32_t ScriptParser::readFill() { + expect("("); + uint32_t V = readOutputSectionFiller(next()); + expect(")"); + expect(";"); + return V; +} + +OutputSectionCommand * +ScriptParser::readOutputSectionDescription(StringRef OutSec) { + OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); + Cmd->Location = getCurrentLocation(); + + // Read an address expression. + // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address + if (peek() != ":") + Cmd->AddrExpr = readExpr(); + + expect(":"); + + if (consume("AT")) + Cmd->LMAExpr = readParenExpr(); + if (consume("ALIGN")) + Cmd->AlignExpr = readParenExpr(); + if (consume("SUBALIGN")) + Cmd->SubalignExpr = readParenExpr(); + + // Parse constraints. + if (consume("ONLY_IF_RO")) + Cmd->Constraint = ConstraintKind::ReadOnly; + if (consume("ONLY_IF_RW")) + Cmd->Constraint = ConstraintKind::ReadWrite; + expect("{"); + + while (!Error && !consume("}")) { + StringRef Tok = next(); + if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { + Cmd->Commands.emplace_back(Assignment); + } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { + Cmd->Commands.emplace_back(Data); + } else if (Tok == "ASSERT") { + Cmd->Commands.emplace_back(new AssertCommand(readAssert())); + expect(";"); + } else if (Tok == "FILL") { + Cmd->Filler = readFill(); + } else if (Tok == "SORT") { + readSort(); + } else if (peek() == "(") { + Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); + } else { + setError("unknown command " + Tok); + } + } + Cmd->Phdrs = readOutputSectionPhdrs(); + + if (consume("=")) + Cmd->Filler = readOutputSectionFiller(next()); + else if (peek().startswith("=")) + Cmd->Filler = readOutputSectionFiller(next().drop_front()); + + return Cmd; +} + +// Read "=<number>" where <number> is an octal/decimal/hexadecimal number. +// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html +// +// ld.gold is not fully compatible with ld.bfd. ld.bfd handles +// hexstrings as blobs of arbitrary sizes, while ld.gold handles them +// as 32-bit big-endian values. We will do the same as ld.gold does +// because it's simpler than what ld.bfd does. +uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { + uint32_t V; + if (!Tok.getAsInteger(0, V)) + return V; + setError("invalid filler expression: " + Tok); + return 0; +} + +SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { + expect("("); + SymbolAssignment *Cmd = readAssignment(next()); + Cmd->Provide = Provide; + Cmd->Hidden = Hidden; + expect(")"); + expect(";"); + return Cmd; +} + +SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { + SymbolAssignment *Cmd = nullptr; + if (peek() == "=" || peek() == "+=") { + Cmd = readAssignment(Tok); + expect(";"); + } else if (Tok == "PROVIDE") { + Cmd = readProvideHidden(true, false); + } else if (Tok == "HIDDEN") { + Cmd = readProvideHidden(false, true); + } else if (Tok == "PROVIDE_HIDDEN") { + Cmd = readProvideHidden(true, true); + } + return Cmd; +} + +static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { + if (S == ".") + return Dot; + return ScriptBase->getSymbolValue(Loc, S); +} + +static bool isAbsolute(StringRef S) { + if (S == ".") + return false; + return ScriptBase->isAbsolute(S); +} + +SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { + StringRef Op = next(); + Expr E; + assert(Op == "=" || Op == "+="); + if (consume("ABSOLUTE")) { + // The RHS may be something like "ABSOLUTE(.) & 0xff". + // Call readExpr1 to read the whole expression. + E = readExpr1(readParenExpr(), 0); + E.IsAbsolute = [] { return true; }; + } else { + E = readExpr(); + } + if (Op == "+=") { + std::string Loc = getCurrentLocation(); + E = [=](uint64_t Dot) { + return getSymbolValue(Loc, Name, Dot) + E(Dot); + }; + } + return new SymbolAssignment(Name, E); +} + +// This is an operator-precedence parser to parse a linker +// script expression. +Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } + +static Expr combine(StringRef Op, Expr L, Expr R) { + if (Op == "*") + return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; + if (Op == "/") { + return [=](uint64_t Dot) -> uint64_t { + uint64_t RHS = R(Dot); + if (RHS == 0) { + error("division by zero"); + return 0; + } + return L(Dot) / RHS; + }; + } + if (Op == "+") + return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, + [=] { return L.IsAbsolute() && R.IsAbsolute(); }, + [=] { + const OutputSectionBase *S = L.Section(); + return S ? S : R.Section(); + }}; + if (Op == "-") + return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; + if (Op == "<<") + return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; + if (Op == ">>") + return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; + if (Op == "<") + return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; + if (Op == ">") + return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; + if (Op == ">=") + return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; + if (Op == "<=") + return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; + if (Op == "==") + return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; + if (Op == "!=") + return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; + if (Op == "&") + return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; + if (Op == "|") + return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; + llvm_unreachable("invalid operator"); +} + +// This is a part of the operator-precedence parser. This function +// assumes that the remaining token stream starts with an operator. +Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { + while (!atEOF() && !Error) { + // Read an operator and an expression. + if (consume("?")) + return readTernary(Lhs); + StringRef Op1 = peek(); + if (precedence(Op1) < MinPrec) + break; + skip(); + Expr Rhs = readPrimary(); + + // Evaluate the remaining part of the expression first if the + // next operator has greater precedence than the previous one. + // For example, if we have read "+" and "3", and if the next + // operator is "*", then we'll evaluate 3 * ... part first. + while (!atEOF()) { + StringRef Op2 = peek(); + if (precedence(Op2) <= precedence(Op1)) + break; + Rhs = readExpr1(Rhs, precedence(Op2)); + } + + Lhs = combine(Op1, Lhs, Rhs); + } + return Lhs; +} + +uint64_t static getConstant(StringRef S) { + if (S == "COMMONPAGESIZE") + return Target->PageSize; + if (S == "MAXPAGESIZE") + return Config->MaxPageSize; + error("unknown constant: " + S); + return 0; +} + +// Parses Tok as an integer. Returns true if successful. +// It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") +// and decimal numbers. Decimal numbers may have "K" (kilo) or +// "M" (mega) prefixes. +static bool readInteger(StringRef Tok, uint64_t &Result) { + // Negative number + if (Tok.startswith("-")) { + if (!readInteger(Tok.substr(1), Result)) + return false; + Result = -Result; + return true; + } + + // Hexadecimal + if (Tok.startswith_lower("0x")) + return !Tok.substr(2).getAsInteger(16, Result); + if (Tok.endswith_lower("H")) + return !Tok.drop_back().getAsInteger(16, Result); + + // Decimal + int Suffix = 1; + if (Tok.endswith_lower("K")) { + Suffix = 1024; + Tok = Tok.drop_back(); + } else if (Tok.endswith_lower("M")) { + Suffix = 1024 * 1024; + Tok = Tok.drop_back(); + } + if (Tok.getAsInteger(10, Result)) + return false; + Result *= Suffix; + return true; +} + +BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { + int Size = StringSwitch<unsigned>(Tok) + .Case("BYTE", 1) + .Case("SHORT", 2) + .Case("LONG", 4) + .Case("QUAD", 8) + .Default(-1); + if (Size == -1) + return nullptr; + + return new BytesDataCommand(readParenExpr(), Size); +} + +StringRef ScriptParser::readParenLiteral() { + expect("("); + StringRef Tok = next(); + expect(")"); + return Tok; +} + +Expr ScriptParser::readPrimary() { + if (peek() == "(") + return readParenExpr(); + + StringRef Tok = next(); + std::string Location = getCurrentLocation(); + + if (Tok == "~") { + Expr E = readPrimary(); + return [=](uint64_t Dot) { return ~E(Dot); }; + } + if (Tok == "-") { + Expr E = readPrimary(); + return [=](uint64_t Dot) { return -E(Dot); }; + } + + // Built-in functions are parsed here. + // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. + if (Tok == "ADDR") { + StringRef Name = readParenLiteral(); + return {[=](uint64_t Dot) { + return ScriptBase->getOutputSection(Location, Name)->Addr; + }, + [=] { return false; }, + [=] { return ScriptBase->getOutputSection(Location, Name); }}; + } + if (Tok == "LOADADDR") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { + return ScriptBase->getOutputSection(Location, Name)->getLMA(); + }; + } + if (Tok == "ASSERT") + return readAssert(); + if (Tok == "ALIGN") { + expect("("); + Expr E = readExpr(); + if (consume(",")) { + Expr E2 = readExpr(); + expect(")"); + return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; + } + expect(")"); + return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; + } + if (Tok == "CONSTANT") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { return getConstant(Name); }; + } + if (Tok == "DEFINED") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; + } + if (Tok == "SEGMENT_START") { + expect("("); + skip(); + expect(","); + Expr E = readExpr(); + expect(")"); + return [=](uint64_t Dot) { return E(Dot); }; + } + if (Tok == "DATA_SEGMENT_ALIGN") { + expect("("); + Expr E = readExpr(); + expect(","); + readExpr(); + expect(")"); + return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; + } + if (Tok == "DATA_SEGMENT_END") { + expect("("); + expect("."); + expect(")"); + return [](uint64_t Dot) { return Dot; }; + } + // GNU linkers implements more complicated logic to handle + // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to + // the next page boundary for simplicity. + if (Tok == "DATA_SEGMENT_RELRO_END") { + expect("("); + readExpr(); + expect(","); + readExpr(); + expect(")"); + return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; + } + if (Tok == "SIZEOF") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; + } + if (Tok == "ALIGNOF") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { + return ScriptBase->getOutputSection(Location, Name)->Addralign; + }; + } + if (Tok == "SIZEOF_HEADERS") + return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; + + // Tok is a literal number. + uint64_t V; + if (readInteger(Tok, V)) + return [=](uint64_t Dot) { return V; }; + + // Tok is a symbol name. + if (Tok != "." && !isValidCIdentifier(Tok)) + setError("malformed number: " + Tok); + return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, + [=] { return isAbsolute(Tok); }, + [=] { return ScriptBase->getSymbolSection(Tok); }}; +} + +Expr ScriptParser::readTernary(Expr Cond) { + Expr L = readExpr(); + expect(":"); + Expr R = readExpr(); + return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; +} + +Expr ScriptParser::readParenExpr() { + expect("("); + Expr E = readExpr(); + expect(")"); + return E; +} + +std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { + std::vector<StringRef> Phdrs; + while (!Error && peek().startswith(":")) { + StringRef Tok = next(); + Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); + } + return Phdrs; +} + +// Read a program header type name. The next token must be a +// name of a program header type or a constant (e.g. "0x3"). +unsigned ScriptParser::readPhdrType() { + StringRef Tok = next(); + uint64_t Val; + if (readInteger(Tok, Val)) + return Val; + + unsigned Ret = StringSwitch<unsigned>(Tok) + .Case("PT_NULL", PT_NULL) + .Case("PT_LOAD", PT_LOAD) + .Case("PT_DYNAMIC", PT_DYNAMIC) + .Case("PT_INTERP", PT_INTERP) + .Case("PT_NOTE", PT_NOTE) + .Case("PT_SHLIB", PT_SHLIB) + .Case("PT_PHDR", PT_PHDR) + .Case("PT_TLS", PT_TLS) + .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) + .Case("PT_GNU_STACK", PT_GNU_STACK) + .Case("PT_GNU_RELRO", PT_GNU_RELRO) + .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) + .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) + .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) + .Default(-1); + + if (Ret == (unsigned)-1) { + setError("invalid program header type: " + Tok); + return PT_NULL; + } + return Ret; +} + +// Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". +void ScriptParser::readAnonymousDeclaration() { + // Read global symbols first. "global:" is default, so if there's + // no label, we assume global symbols. + if (consume("global:") || peek() != "local:") + Config->VersionScriptGlobals = readSymbols(); + + // Next, read local symbols. + if (consume("local:")) { + if (consume("*")) { + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + expect(";"); + } else { + setError("local symbol list for anonymous version is not supported"); + } + } + expect("}"); + expect(";"); +} + +// Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". +void ScriptParser::readVersionDeclaration(StringRef VerStr) { + // Identifiers start at 2 because 0 and 1 are reserved + // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. + uint16_t VersionId = Config->VersionDefinitions.size() + 2; + Config->VersionDefinitions.push_back({VerStr, VersionId}); + + // Read global symbols. + if (consume("global:") || peek() != "local:") + Config->VersionDefinitions.back().Globals = readSymbols(); + + // Read local symbols. + if (consume("local:")) { + if (consume("*")) { + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + expect(";"); + } else { + for (SymbolVersion V : readSymbols()) + Config->VersionScriptLocals.push_back(V); + } + } + expect("}"); + + // Each version may have a parent version. For example, "Ver2" + // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" + // as a parent. This version hierarchy is, probably against your + // instinct, purely for hint; the runtime doesn't care about it + // at all. In LLD, we simply ignore it. + if (peek() != ";") + skip(); + expect(";"); +} + +// Reads a list of symbols for a versions cript. +std::vector<SymbolVersion> ScriptParser::readSymbols() { + std::vector<SymbolVersion> Ret; + for (;;) { + if (consume("extern")) { + for (SymbolVersion V : readVersionExtern()) + Ret.push_back(V); + continue; + } + + if (peek() == "}" || peek() == "local:" || Error) + break; + StringRef Tok = next(); + Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); + expect(";"); + } + return Ret; +} + +// Reads an "extern C++" directive, e.g., +// "extern "C++" { ns::*; "f(int, double)"; };" +std::vector<SymbolVersion> ScriptParser::readVersionExtern() { + StringRef Tok = next(); + bool IsCXX = Tok == "\"C++\""; + if (!IsCXX && Tok != "\"C\"") + setError("Unknown language"); + expect("{"); + + std::vector<SymbolVersion> Ret; + while (!Error && peek() != "}") { + StringRef Tok = next(); + bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); + Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); + expect(";"); + } + + expect("}"); + expect(";"); + return Ret; +} + +void elf::readLinkerScript(MemoryBufferRef MB) { + ScriptParser(MB).readLinkerScript(); +} + +void elf::readVersionScript(MemoryBufferRef MB) { + ScriptParser(MB).readVersionScript(); +} + +void elf::readDynamicList(MemoryBufferRef MB) { + ScriptParser(MB).readDynamicList(); +} + +template class elf::LinkerScript<ELF32LE>; +template class elf::LinkerScript<ELF32BE>; +template class elf::LinkerScript<ELF64LE>; +template class elf::LinkerScript<ELF64BE>; diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.h b/contrib/llvm/tools/lld/ELF/LinkerScript.h new file mode 100644 index 000000000000..505162f0ab43 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.h @@ -0,0 +1,298 @@ +//===- LinkerScript.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_LINKER_SCRIPT_H +#define LLD_ELF_LINKER_SCRIPT_H + +#include "Config.h" +#include "Strings.h" +#include "Writer.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstddef> +#include <cstdint> +#include <functional> +#include <memory> +#include <vector> + +namespace lld { +namespace elf { + +class DefinedCommon; +class ScriptParser; +class SymbolBody; +template <class ELFT> class InputSectionBase; +template <class ELFT> class InputSection; +class OutputSectionBase; +template <class ELFT> class OutputSectionFactory; +class InputSectionData; + +// This represents an expression in the linker script. +// ScriptParser::readExpr reads an expression and returns an Expr. +// Later, we evaluate the expression by calling the function +// with the value of special context variable ".". +struct Expr { + std::function<uint64_t(uint64_t)> Val; + std::function<bool()> IsAbsolute; + + // If expression is section-relative the function below is used + // to get the output section pointer. + std::function<const OutputSectionBase *()> Section; + + uint64_t operator()(uint64_t Dot) const { return Val(Dot); } + operator bool() const { return (bool)Val; } + + Expr(std::function<uint64_t(uint64_t)> Val, std::function<bool()> IsAbsolute, + std::function<const OutputSectionBase *()> Section) + : Val(Val), IsAbsolute(IsAbsolute), Section(Section) {} + template <typename T> + Expr(T V) : Expr(V, [] { return true; }, [] { return nullptr; }) {} + Expr() : Expr(nullptr) {} +}; + +// Parses a linker script. Calling this function updates +// Config and ScriptConfig. +void readLinkerScript(MemoryBufferRef MB); + +// Parses a version script. +void readVersionScript(MemoryBufferRef MB); + +void readDynamicList(MemoryBufferRef MB); + +// This enum is used to implement linker script SECTIONS command. +// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS +enum SectionsCommandKind { + AssignmentKind, // . = expr or <sym> = expr + OutputSectionKind, + InputSectionKind, + AssertKind, // ASSERT(expr) + BytesDataKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) +}; + +struct BaseCommand { + BaseCommand(int K) : Kind(K) {} + + virtual ~BaseCommand() = default; + + int Kind; +}; + +// This represents ". = <expr>" or "<symbol> = <expr>". +struct SymbolAssignment : BaseCommand { + SymbolAssignment(StringRef Name, Expr E) + : BaseCommand(AssignmentKind), Name(Name), Expression(E) {} + + static bool classof(const BaseCommand *C); + + // The LHS of an expression. Name is either a symbol name or ".". + StringRef Name; + SymbolBody *Sym = nullptr; + + // The RHS of an expression. + Expr Expression; + + // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. + bool Provide = false; + bool Hidden = false; +}; + +// Linker scripts allow additional constraints to be put on ouput sections. +// If an output section is marked as ONLY_IF_RO, the section is created +// only if its input sections are read-only. Likewise, an output section +// with ONLY_IF_RW is created if all input sections are RW. +enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; + +struct OutputSectionCommand : BaseCommand { + OutputSectionCommand(StringRef Name) + : BaseCommand(OutputSectionKind), Name(Name) {} + + static bool classof(const BaseCommand *C); + + StringRef Name; + Expr AddrExpr; + Expr AlignExpr; + Expr LMAExpr; + Expr SubalignExpr; + std::vector<std::unique_ptr<BaseCommand>> Commands; + std::vector<StringRef> Phdrs; + uint32_t Filler = 0; + ConstraintKind Constraint = ConstraintKind::NoConstraint; + std::string Location; +}; + +// This struct represents one section match pattern in SECTIONS() command. +// It can optionally have negative match pattern for EXCLUDED_FILE command. +// Also it may be surrounded with SORT() command, so contains sorting rules. +struct SectionPattern { + SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2) + : ExcludedFilePat(Pat1), SectionPat(Pat2) {} + + StringMatcher ExcludedFilePat; + StringMatcher SectionPat; + SortSectionPolicy SortOuter; + SortSectionPolicy SortInner; +}; + +struct InputSectionDescription : BaseCommand { + InputSectionDescription(StringRef FilePattern) + : BaseCommand(InputSectionKind), FilePat(FilePattern) {} + + static bool classof(const BaseCommand *C); + + StringMatcher FilePat; + + // Input sections that matches at least one of SectionPatterns + // will be associated with this InputSectionDescription. + std::vector<SectionPattern> SectionPatterns; + + std::vector<InputSectionData *> Sections; +}; + +// Represents an ASSERT(). +struct AssertCommand : BaseCommand { + AssertCommand(Expr E) : BaseCommand(AssertKind), Expression(E) {} + + static bool classof(const BaseCommand *C); + + Expr Expression; +}; + +// Represents BYTE(), SHORT(), LONG(), or QUAD(). +struct BytesDataCommand : BaseCommand { + BytesDataCommand(Expr E, unsigned Size) + : BaseCommand(BytesDataKind), Expression(E), Size(Size) {} + + static bool classof(const BaseCommand *C); + + Expr Expression; + unsigned Offset; + unsigned Size; +}; + +struct PhdrsCommand { + StringRef Name; + unsigned Type; + bool HasFilehdr; + bool HasPhdrs; + unsigned Flags; + Expr LMAExpr; +}; + +class LinkerScriptBase { +protected: + ~LinkerScriptBase() = default; + +public: + virtual uint64_t getHeaderSize() = 0; + virtual uint64_t getSymbolValue(const Twine &Loc, StringRef S) = 0; + virtual bool isDefined(StringRef S) = 0; + virtual bool isAbsolute(StringRef S) = 0; + virtual const OutputSectionBase *getSymbolSection(StringRef S) = 0; + virtual const OutputSectionBase *getOutputSection(const Twine &Loc, + StringRef S) = 0; + virtual uint64_t getOutputSectionSize(StringRef S) = 0; +}; + +// ScriptConfiguration holds linker script parse results. +struct ScriptConfiguration { + // Used to assign addresses to sections. + std::vector<std::unique_ptr<BaseCommand>> Commands; + + // Used to assign sections to headers. + std::vector<PhdrsCommand> PhdrsCommands; + + bool HasSections = false; + + // List of section patterns specified with KEEP commands. They will + // be kept even if they are unused and --gc-sections is specified. + std::vector<InputSectionDescription *> KeptSections; +}; + +extern ScriptConfiguration *ScriptConfig; + +// This is a runner of the linker script. +template <class ELFT> class LinkerScript final : public LinkerScriptBase { + typedef typename ELFT::uint uintX_t; + +public: + LinkerScript(); + ~LinkerScript(); + + void processCommands(OutputSectionFactory<ELFT> &Factory); + void addOrphanSections(OutputSectionFactory<ELFT> &Factory); + void removeEmptyCommands(); + void adjustSectionsBeforeSorting(); + void adjustSectionsAfterSorting(); + + std::vector<PhdrEntry> createPhdrs(); + bool ignoreInterpSection(); + + uint32_t getFiller(StringRef Name); + void writeDataBytes(StringRef Name, uint8_t *Buf); + bool hasLMA(StringRef Name); + bool shouldKeep(InputSectionBase<ELFT> *S); + void assignOffsets(OutputSectionCommand *Cmd); + void placeOrphanSections(); + void assignAddresses(std::vector<PhdrEntry> &Phdrs); + bool hasPhdrsCommands(); + uint64_t getHeaderSize() override; + uint64_t getSymbolValue(const Twine &Loc, StringRef S) override; + bool isDefined(StringRef S) override; + bool isAbsolute(StringRef S) override; + const OutputSectionBase *getSymbolSection(StringRef S) override; + const OutputSectionBase *getOutputSection(const Twine &Loc, + StringRef S) override; + uint64_t getOutputSectionSize(StringRef S) override; + + std::vector<OutputSectionBase *> *OutputSections; + + int getSectionIndex(StringRef Name); + +private: + void computeInputSections(InputSectionDescription *); + + void addSection(OutputSectionFactory<ELFT> &Factory, + InputSectionBase<ELFT> *Sec, StringRef Name); + void discard(ArrayRef<InputSectionBase<ELFT> *> V); + + std::vector<InputSectionBase<ELFT> *> + createInputSectionList(OutputSectionCommand &Cmd); + + // "ScriptConfig" is a bit too long, so define a short name for it. + ScriptConfiguration &Opt = *ScriptConfig; + + std::vector<size_t> getPhdrIndices(StringRef SectionName); + size_t getPhdrIndex(const Twine &Loc, StringRef PhdrName); + + uintX_t Dot; + uintX_t LMAOffset = 0; + OutputSectionBase *CurOutSec = nullptr; + uintX_t ThreadBssOffset = 0; + void switchTo(OutputSectionBase *Sec); + void flush(); + void output(InputSection<ELFT> *Sec); + void process(BaseCommand &Base); + llvm::DenseSet<OutputSectionBase *> AlreadyOutputOS; + llvm::DenseSet<InputSectionData *> AlreadyOutputIS; +}; + +// Variable template is a C++14 feature, so we can't template +// a global variable. Use a struct to workaround. +template <class ELFT> struct Script { static LinkerScript<ELFT> *X; }; +template <class ELFT> LinkerScript<ELFT> *Script<ELFT>::X; + +extern LinkerScriptBase *ScriptBase; + +} // end namespace elf +} // end namespace lld + +#endif // LLD_ELF_LINKER_SCRIPT_H diff --git a/contrib/llvm/tools/lld/ELF/MarkLive.cpp b/contrib/llvm/tools/lld/ELF/MarkLive.cpp new file mode 100644 index 000000000000..8d129fc3ff13 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/MarkLive.cpp @@ -0,0 +1,255 @@ +//===- MarkLive.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements --gc-sections, which is a feature to remove unused +// sections from output. Unused sections are sections that are not reachable +// from known GC-root symbols or sections. Naturally the feature is +// implemented as a mark-sweep garbage collector. +// +// Here's how it works. Each InputSectionBase has a "Live" bit. The bit is off +// by default. Starting with GC-root symbols or sections, markLive function +// defined in this file visits all reachable sections to set their Live +// bits. Writer will then ignore sections whose Live bits are off, so that +// such sections are not included into output. +// +//===----------------------------------------------------------------------===// + +#include "InputSection.h" +#include "LinkerScript.h" +#include "OutputSections.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "Target.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/ELF.h" +#include <functional> +#include <vector> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +namespace { +// A resolved relocation. The Sec and Offset fields are set if the relocation +// was resolved to an offset within a section. +template <class ELFT> struct ResolvedReloc { + InputSectionBase<ELFT> *Sec; + typename ELFT::uint Offset; +}; +} // end anonymous namespace + +template <class ELFT> +static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec, + const typename ELFT::Rel &Rel) { + return Target->getImplicitAddend(Sec.Data.begin() + Rel.r_offset, + Rel.getType(Config->Mips64EL)); +} + +template <class ELFT> +static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec, + const typename ELFT::Rela &Rel) { + return Rel.r_addend; +} + +template <class ELFT, class RelT> +static ResolvedReloc<ELFT> resolveReloc(InputSectionBase<ELFT> &Sec, + RelT &Rel) { + SymbolBody &B = Sec.getFile()->getRelocTargetSym(Rel); + auto *D = dyn_cast<DefinedRegular<ELFT>>(&B); + if (!D || !D->Section) + return {nullptr, 0}; + typename ELFT::uint Offset = D->Value; + if (D->isSection()) + Offset += getAddend(Sec, Rel); + return {D->Section->Repl, Offset}; +} + +// Calls Fn for each section that Sec refers to via relocations. +template <class ELFT> +static void forEachSuccessor(InputSection<ELFT> &Sec, + std::function<void(ResolvedReloc<ELFT>)> Fn) { + if (Sec.AreRelocsRela) { + for (const typename ELFT::Rela &Rel : Sec.relas()) + Fn(resolveReloc(Sec, Rel)); + } else { + for (const typename ELFT::Rel &Rel : Sec.rels()) + Fn(resolveReloc(Sec, Rel)); + } + if (Sec.DependentSection) + Fn({Sec.DependentSection, 0}); +} + +// The .eh_frame section is an unfortunate special case. +// The section is divided in CIEs and FDEs and the relocations it can have are +// * CIEs can refer to a personality function. +// * FDEs can refer to a LSDA +// * FDEs refer to the function they contain information about +// The last kind of relocation cannot keep the referred section alive, or they +// would keep everything alive in a common object file. In fact, each FDE is +// alive if the section it refers to is alive. +// To keep things simple, in here we just ignore the last relocation kind. The +// other two keep the referred section alive. +// +// A possible improvement would be to fully process .eh_frame in the middle of +// the gc pass. With that we would be able to also gc some sections holding +// LSDAs and personality functions if we found that they were unused. +template <class ELFT, class RelTy> +static void +scanEhFrameSection(EhInputSection<ELFT> &EH, ArrayRef<RelTy> Rels, + std::function<void(ResolvedReloc<ELFT>)> Enqueue) { + const endianness E = ELFT::TargetEndianness; + for (unsigned I = 0, N = EH.Pieces.size(); I < N; ++I) { + EhSectionPiece &Piece = EH.Pieces[I]; + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI == (unsigned)-1) + continue; + if (read32<E>(Piece.data().data() + 4) == 0) { + // This is a CIE, we only need to worry about the first relocation. It is + // known to point to the personality function. + Enqueue(resolveReloc(EH, Rels[FirstRelI])); + continue; + } + // This is a FDE. The relocations point to the described function or to + // a LSDA. We only need to keep the LSDA alive, so ignore anything that + // points to executable sections. + typename ELFT::uint PieceEnd = Piece.InputOff + Piece.size(); + for (unsigned I2 = FirstRelI, N2 = Rels.size(); I2 < N2; ++I2) { + const RelTy &Rel = Rels[I2]; + if (Rel.r_offset >= PieceEnd) + break; + ResolvedReloc<ELFT> R = resolveReloc(EH, Rels[I2]); + if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) + continue; + if (R.Sec->Flags & SHF_EXECINSTR) + continue; + Enqueue({R.Sec, 0}); + } + } +} + +template <class ELFT> +static void +scanEhFrameSection(EhInputSection<ELFT> &EH, + std::function<void(ResolvedReloc<ELFT>)> Enqueue) { + if (!EH.NumRelocations) + return; + + // Unfortunately we need to split .eh_frame early since some relocations in + // .eh_frame keep other section alive and some don't. + EH.split(); + + if (EH.AreRelocsRela) + scanEhFrameSection(EH, EH.relas(), Enqueue); + else + scanEhFrameSection(EH, EH.rels(), Enqueue); +} + +// We do not garbage-collect two types of sections: +// 1) Sections used by the loader (.init, .fini, .ctors, .dtors or .jcr) +// 2) Non-allocatable sections which typically contain debugging information +template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) { + switch (Sec->Type) { + case SHT_FINI_ARRAY: + case SHT_INIT_ARRAY: + case SHT_NOTE: + case SHT_PREINIT_ARRAY: + return true; + default: + if (!(Sec->Flags & SHF_ALLOC)) + return true; + + // We do not want to reclaim sections if they can be referred + // by __start_* and __stop_* symbols. + StringRef S = Sec->Name; + if (isValidCIdentifier(S)) + return true; + + return S.startswith(".ctors") || S.startswith(".dtors") || + S.startswith(".init") || S.startswith(".fini") || + S.startswith(".jcr"); + } +} + +// This is the main function of the garbage collector. +// Starting from GC-root sections, this function visits all reachable +// sections to set their "Live" bits. +template <class ELFT> void elf::markLive() { + SmallVector<InputSection<ELFT> *, 256> Q; + + auto Enqueue = [&](ResolvedReloc<ELFT> R) { + // Skip over discarded sections. This in theory shouldn't happen, because + // the ELF spec doesn't allow a relocation to point to a deduplicated + // COMDAT section directly. Unfortunately this happens in practice (e.g. + // .eh_frame) so we need to add a check. + if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) + return; + + // We don't gc non alloc sections. + if (!(R.Sec->Flags & SHF_ALLOC)) + return; + + // Usually, a whole section is marked as live or dead, but in mergeable + // (splittable) sections, each piece of data has independent liveness bit. + // So we explicitly tell it which offset is in use. + if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(R.Sec)) + MS->markLiveAt(R.Offset); + + if (R.Sec->Live) + return; + R.Sec->Live = true; + // Add input section to the queue. + if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(R.Sec)) + Q.push_back(S); + }; + + auto MarkSymbol = [&](const SymbolBody *Sym) { + if (auto *D = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym)) + Enqueue({D->Section, D->Value}); + }; + + // Add GC root symbols. + MarkSymbol(Symtab<ELFT>::X->find(Config->Entry)); + MarkSymbol(Symtab<ELFT>::X->find(Config->Init)); + MarkSymbol(Symtab<ELFT>::X->find(Config->Fini)); + for (StringRef S : Config->Undefined) + MarkSymbol(Symtab<ELFT>::X->find(S)); + + // Preserve externally-visible symbols if the symbols defined by this + // file can interrupt other ELF file's symbols at runtime. + for (const Symbol *S : Symtab<ELFT>::X->getSymbols()) + if (S->includeInDynsym()) + MarkSymbol(S->body()); + + // Preserve special sections and those which are specified in linker + // script KEEP command. + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + // .eh_frame is always marked as live now, but also it can reference to + // sections that contain personality. We preserve all non-text sections + // referred by .eh_frame here. + if (auto *EH = dyn_cast_or_null<EhInputSection<ELFT>>(Sec)) + scanEhFrameSection<ELFT>(*EH, Enqueue); + if (isReserved(Sec) || Script<ELFT>::X->shouldKeep(Sec)) + Enqueue({Sec, 0}); + } + + // Mark all reachable sections. + while (!Q.empty()) + forEachSuccessor<ELFT>(*Q.pop_back_val(), Enqueue); +} + +template void elf::markLive<ELF32LE>(); +template void elf::markLive<ELF32BE>(); +template void elf::markLive<ELF64LE>(); +template void elf::markLive<ELF64BE>(); diff --git a/contrib/llvm/tools/lld/ELF/Memory.h b/contrib/llvm/tools/lld/ELF/Memory.h new file mode 100644 index 000000000000..e5a04ed1e5a8 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Memory.h @@ -0,0 +1,67 @@ +//===- Memory.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines arena allocators. +// +// Almost all large objects, such as files, sections or symbols, are +// used for the entire lifetime of the linker once they are created. +// This usage characteristic makes arena allocator an attractive choice +// where the entire linker is one arena. With an arena, newly created +// objects belong to the arena and freed all at once when everything is done. +// Arena allocators are efficient and easy to understand. +// Most objects are allocated using the arena allocators defined by this file. +// +// If you edit this file, please edit COFF/Memory.h too. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_MEMORY_H +#define LLD_ELF_MEMORY_H + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" +#include <vector> + +namespace lld { +namespace elf { + +// Use this arena if your object doesn't have a destructor. +extern llvm::BumpPtrAllocator BAlloc; +extern llvm::StringSaver Saver; + +// These two classes are hack to keep track of all +// SpecificBumpPtrAllocator instances. +struct SpecificAllocBase { + SpecificAllocBase() { Instances.push_back(this); } + virtual ~SpecificAllocBase() = default; + virtual void reset() = 0; + static std::vector<SpecificAllocBase *> Instances; +}; + +template <class T> struct SpecificAlloc : public SpecificAllocBase { + void reset() override { Alloc.DestroyAll(); } + llvm::SpecificBumpPtrAllocator<T> Alloc; +}; + +// Use this arena if your object has a destructor. +// Your destructor will be invoked from freeArena(). +template <typename T, typename... U> T *make(U &&... Args) { + static SpecificAlloc<T> Alloc; + return new (Alloc.Alloc.Allocate()) T(std::forward<U>(Args)...); +} + +inline void freeArena() { + for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances) + Alloc->reset(); + BAlloc.Reset(); +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Mips.cpp b/contrib/llvm/tools/lld/ELF/Mips.cpp new file mode 100644 index 000000000000..ac65672b70fc --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Mips.cpp @@ -0,0 +1,369 @@ +//===- Mips.cpp ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains a helper function for the Writer. +// +//===---------------------------------------------------------------------===// + +#include "Error.h" +#include "InputFiles.h" +#include "SymbolTable.h" +#include "Writer.h" + +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/MipsABIFlags.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +namespace { +struct ArchTreeEdge { + uint32_t Child; + uint32_t Parent; +}; + +struct FileFlags { + StringRef Filename; + uint32_t Flags; +}; +} + +static StringRef getAbiName(uint32_t Flags) { + switch (Flags) { + case 0: + return "n64"; + case EF_MIPS_ABI2: + return "n32"; + case EF_MIPS_ABI_O32: + return "o32"; + case EF_MIPS_ABI_O64: + return "o64"; + case EF_MIPS_ABI_EABI32: + return "eabi32"; + case EF_MIPS_ABI_EABI64: + return "eabi64"; + default: + return "unknown"; + } +} + +static StringRef getNanName(bool IsNan2008) { + return IsNan2008 ? "2008" : "legacy"; +} + +static StringRef getFpName(bool IsFp64) { return IsFp64 ? "64" : "32"; } + +static void checkFlags(ArrayRef<FileFlags> Files) { + uint32_t ABI = Files[0].Flags & (EF_MIPS_ABI | EF_MIPS_ABI2); + bool Nan = Files[0].Flags & EF_MIPS_NAN2008; + bool Fp = Files[0].Flags & EF_MIPS_FP64; + + for (const FileFlags &F : Files.slice(1)) { + uint32_t ABI2 = F.Flags & (EF_MIPS_ABI | EF_MIPS_ABI2); + if (ABI != ABI2) + error("target ABI '" + getAbiName(ABI) + "' is incompatible with '" + + getAbiName(ABI2) + "': " + F.Filename); + + bool Nan2 = F.Flags & EF_MIPS_NAN2008; + if (Nan != Nan2) + error("target -mnan=" + getNanName(Nan) + " is incompatible with -mnan=" + + getNanName(Nan2) + ": " + F.Filename); + + bool Fp2 = F.Flags & EF_MIPS_FP64; + if (Fp != Fp2) + error("target -mfp" + getFpName(Fp) + " is incompatible with -mfp" + + getFpName(Fp2) + ": " + F.Filename); + } +} + +static uint32_t getMiscFlags(ArrayRef<FileFlags> Files) { + uint32_t Ret = 0; + for (const FileFlags &F : Files) + Ret |= F.Flags & + (EF_MIPS_ABI | EF_MIPS_ABI2 | EF_MIPS_ARCH_ASE | EF_MIPS_NOREORDER | + EF_MIPS_MICROMIPS | EF_MIPS_NAN2008 | EF_MIPS_32BITMODE); + return Ret; +} + +static uint32_t getPicFlags(ArrayRef<FileFlags> Files) { + // Check PIC/non-PIC compatibility. + bool IsPic = Files[0].Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + for (const FileFlags &F : Files.slice(1)) { + bool IsPic2 = F.Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + if (IsPic && !IsPic2) + warn("linking abicalls code with non-abicalls file: " + F.Filename); + if (!IsPic && IsPic2) + warn("linking non-abicalls code with abicalls file: " + F.Filename); + } + + // Compute the result PIC/non-PIC flag. + uint32_t Ret = Files[0].Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + for (const FileFlags &F : Files.slice(1)) + Ret &= F.Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + + // PIC code is inherently CPIC and may not set CPIC flag explicitly. + if (Ret & EF_MIPS_PIC) + Ret |= EF_MIPS_CPIC; + return Ret; +} + +static ArchTreeEdge ArchTree[] = { + // MIPS32R6 and MIPS64R6 are not compatible with other extensions + // MIPS64R2 extensions. + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_OCTEON3, EF_MIPS_ARCH_64R2}, + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_OCTEON2, EF_MIPS_ARCH_64R2}, + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_OCTEON, EF_MIPS_ARCH_64R2}, + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_LS3A, EF_MIPS_ARCH_64R2}, + // MIPS64 extensions. + {EF_MIPS_ARCH_64 | EF_MIPS_MACH_SB1, EF_MIPS_ARCH_64}, + {EF_MIPS_ARCH_64 | EF_MIPS_MACH_XLR, EF_MIPS_ARCH_64}, + {EF_MIPS_ARCH_64R2, EF_MIPS_ARCH_64}, + // MIPS V extensions. + {EF_MIPS_ARCH_64, EF_MIPS_ARCH_5}, + // R5000 extensions. + {EF_MIPS_ARCH_4 | EF_MIPS_MACH_5500, EF_MIPS_ARCH_4 | EF_MIPS_MACH_5400}, + // MIPS IV extensions. + {EF_MIPS_ARCH_4 | EF_MIPS_MACH_5400, EF_MIPS_ARCH_4}, + {EF_MIPS_ARCH_4 | EF_MIPS_MACH_9000, EF_MIPS_ARCH_4}, + {EF_MIPS_ARCH_5, EF_MIPS_ARCH_4}, + // VR4100 extensions. + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4111, EF_MIPS_ARCH_3 | EF_MIPS_MACH_4100}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4120, EF_MIPS_ARCH_3 | EF_MIPS_MACH_4100}, + // MIPS III extensions. + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4010, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4100, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4650, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_5900, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_LS2E, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_LS2F, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_4, EF_MIPS_ARCH_3}, + // MIPS32 extensions. + {EF_MIPS_ARCH_32R2, EF_MIPS_ARCH_32}, + // MIPS II extensions. + {EF_MIPS_ARCH_3, EF_MIPS_ARCH_2}, + {EF_MIPS_ARCH_32, EF_MIPS_ARCH_2}, + // MIPS I extensions. + {EF_MIPS_ARCH_1 | EF_MIPS_MACH_3900, EF_MIPS_ARCH_1}, + {EF_MIPS_ARCH_2, EF_MIPS_ARCH_1}, +}; + +static bool isArchMatched(uint32_t New, uint32_t Res) { + if (New == Res) + return true; + if (New == EF_MIPS_ARCH_32 && isArchMatched(EF_MIPS_ARCH_64, Res)) + return true; + if (New == EF_MIPS_ARCH_32R2 && isArchMatched(EF_MIPS_ARCH_64R2, Res)) + return true; + for (const auto &Edge : ArchTree) { + if (Res == Edge.Child) { + Res = Edge.Parent; + if (Res == New) + return true; + } + } + return false; +} + +static StringRef getMachName(uint32_t Flags) { + switch (Flags & EF_MIPS_MACH) { + case EF_MIPS_MACH_NONE: + return ""; + case EF_MIPS_MACH_3900: + return "r3900"; + case EF_MIPS_MACH_4010: + return "r4010"; + case EF_MIPS_MACH_4100: + return "r4100"; + case EF_MIPS_MACH_4650: + return "r4650"; + case EF_MIPS_MACH_4120: + return "r4120"; + case EF_MIPS_MACH_4111: + return "r4111"; + case EF_MIPS_MACH_5400: + return "vr5400"; + case EF_MIPS_MACH_5900: + return "vr5900"; + case EF_MIPS_MACH_5500: + return "vr5500"; + case EF_MIPS_MACH_9000: + return "rm9000"; + case EF_MIPS_MACH_LS2E: + return "loongson2e"; + case EF_MIPS_MACH_LS2F: + return "loongson2f"; + case EF_MIPS_MACH_LS3A: + return "loongson3a"; + case EF_MIPS_MACH_OCTEON: + return "octeon"; + case EF_MIPS_MACH_OCTEON2: + return "octeon2"; + case EF_MIPS_MACH_OCTEON3: + return "octeon3"; + case EF_MIPS_MACH_SB1: + return "sb1"; + case EF_MIPS_MACH_XLR: + return "xlr"; + default: + return "unknown machine"; + } +} + +static StringRef getArchName(uint32_t Flags) { + StringRef S = getMachName(Flags); + if (!S.empty()) + return S; + + switch (Flags & EF_MIPS_ARCH) { + case EF_MIPS_ARCH_1: + return "mips1"; + case EF_MIPS_ARCH_2: + return "mips2"; + case EF_MIPS_ARCH_3: + return "mips3"; + case EF_MIPS_ARCH_4: + return "mips4"; + case EF_MIPS_ARCH_5: + return "mips5"; + case EF_MIPS_ARCH_32: + return "mips32"; + case EF_MIPS_ARCH_64: + return "mips64"; + case EF_MIPS_ARCH_32R2: + return "mips32r2"; + case EF_MIPS_ARCH_64R2: + return "mips64r2"; + case EF_MIPS_ARCH_32R6: + return "mips32r6"; + case EF_MIPS_ARCH_64R6: + return "mips64r6"; + default: + return "unknown arch"; + } +} + +// There are (arguably too) many MIPS ISAs out there. Their relationships +// can be represented as a forest. If all input files have ISAs which +// reachable by repeated proceeding from the single child to the parent, +// these input files are compatible. In that case we need to return "highest" +// ISA. If there are incompatible input files, we show an error. +// For example, mips1 is a "parent" of mips2 and such files are compatible. +// Output file gets EF_MIPS_ARCH_2 flag. From the other side mips3 and mips32 +// are incompatible because nor mips3 is a parent for misp32, nor mips32 +// is a parent for mips3. +static uint32_t getArchFlags(ArrayRef<FileFlags> Files) { + uint32_t Ret = Files[0].Flags & (EF_MIPS_ARCH | EF_MIPS_MACH); + + for (const FileFlags &F : Files.slice(1)) { + uint32_t New = F.Flags & (EF_MIPS_ARCH | EF_MIPS_MACH); + + // Check ISA compatibility. + if (isArchMatched(New, Ret)) + continue; + if (!isArchMatched(Ret, New)) { + error("target ISA '" + getArchName(Ret) + "' is incompatible with '" + + getArchName(New) + "': " + F.Filename); + return 0; + } + Ret = New; + } + return Ret; +} + +template <class ELFT> uint32_t elf::getMipsEFlags() { + std::vector<FileFlags> V; + for (elf::ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) + V.push_back({F->getName(), F->getObj().getHeader()->e_flags}); + if (V.empty()) + return 0; + checkFlags(V); + return getMiscFlags(V) | getPicFlags(V) | getArchFlags(V); +} + +static int compareMipsFpAbi(uint8_t FpA, uint8_t FpB) { + if (FpA == FpB) + return 0; + if (FpB == Mips::Val_GNU_MIPS_ABI_FP_ANY) + return 1; + if (FpB == Mips::Val_GNU_MIPS_ABI_FP_64A && + FpA == Mips::Val_GNU_MIPS_ABI_FP_64) + return 1; + if (FpB != Mips::Val_GNU_MIPS_ABI_FP_XX) + return -1; + if (FpA == Mips::Val_GNU_MIPS_ABI_FP_DOUBLE || + FpA == Mips::Val_GNU_MIPS_ABI_FP_64 || + FpA == Mips::Val_GNU_MIPS_ABI_FP_64A) + return 1; + return -1; +} + +static StringRef getMipsFpAbiName(uint8_t FpAbi) { + switch (FpAbi) { + case Mips::Val_GNU_MIPS_ABI_FP_ANY: + return "any"; + case Mips::Val_GNU_MIPS_ABI_FP_DOUBLE: + return "-mdouble-float"; + case Mips::Val_GNU_MIPS_ABI_FP_SINGLE: + return "-msingle-float"; + case Mips::Val_GNU_MIPS_ABI_FP_SOFT: + return "-msoft-float"; + case Mips::Val_GNU_MIPS_ABI_FP_OLD_64: + return "-mips32r2 -mfp64 (old)"; + case Mips::Val_GNU_MIPS_ABI_FP_XX: + return "-mfpxx"; + case Mips::Val_GNU_MIPS_ABI_FP_64: + return "-mgp32 -mfp64"; + case Mips::Val_GNU_MIPS_ABI_FP_64A: + return "-mgp32 -mfp64 -mno-odd-spreg"; + default: + return "unknown"; + } +} + +uint8_t elf::getMipsFpAbiFlag(uint8_t OldFlag, uint8_t NewFlag, + StringRef FileName) { + if (compareMipsFpAbi(NewFlag, OldFlag) >= 0) + return NewFlag; + if (compareMipsFpAbi(OldFlag, NewFlag) < 0) + error("target floating point ABI '" + getMipsFpAbiName(OldFlag) + + "' is incompatible with '" + getMipsFpAbiName(NewFlag) + "': " + + FileName); + return OldFlag; +} + +template <class ELFT> static bool isN32Abi(const InputFile *F) { + if (auto *EF = dyn_cast<ELFFileBase<ELFT>>(F)) + return EF->getObj().getHeader()->e_flags & EF_MIPS_ABI2; + return false; +} + +bool elf::isMipsN32Abi(const InputFile *F) { + switch (Config->EKind) { + case ELF32LEKind: + return isN32Abi<ELF32LE>(F); + case ELF32BEKind: + return isN32Abi<ELF32BE>(F); + case ELF64LEKind: + return isN32Abi<ELF64LE>(F); + case ELF64BEKind: + return isN32Abi<ELF64BE>(F); + default: + llvm_unreachable("unknown Config->EKind"); + } +} + +template uint32_t elf::getMipsEFlags<ELF32LE>(); +template uint32_t elf::getMipsEFlags<ELF32BE>(); +template uint32_t elf::getMipsEFlags<ELF64LE>(); +template uint32_t elf::getMipsEFlags<ELF64BE>(); diff --git a/contrib/llvm/tools/lld/ELF/Options.td b/contrib/llvm/tools/lld/ELF/Options.td new file mode 100644 index 000000000000..d436f056d013 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Options.td @@ -0,0 +1,367 @@ +include "llvm/Option/OptParser.td" + +// For options whose names are multiple letters, either one dash or +// two can precede the option name except those that start with 'o'. +class F<string name>: Flag<["--", "-"], name>; +class J<string name>: Joined<["--", "-"], name>; +class S<string name>: Separate<["--", "-"], name>; +class JS<string name>: JoinedOrSeparate<["--", "-"], name>; + +def auxiliary: S<"auxiliary">, HelpText<"Set DT_AUXILIARY field to the specified name">; + +def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind defined symbols locally">; + +def Bsymbolic_functions: F<"Bsymbolic-functions">, + HelpText<"Bind defined function symbols locally">; + +def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries">; + +def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">; + +def build_id: F<"build-id">, HelpText<"Generate build ID note">; + +def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">; + +def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">, + HelpText<"Add a directory to the library search path">; + +def O: Joined<["-"], "O">, HelpText<"Optimize output file size">; + +def Tbss: S<"Tbss">, HelpText<"Same as --section-start with .bss as the sectionname">; + +def Tdata: S<"Tdata">, HelpText<"Same as --section-start with .data as the sectionname">; + +def Ttext: S<"Ttext">, HelpText<"Same as --section-start with .text as the sectionname">; + +def allow_multiple_definition: F<"allow-multiple-definition">, + HelpText<"Allow multiple definitions">; + +def as_needed: F<"as-needed">, + HelpText<"Only set DT_NEEDED for shared libraries if used">; + +def color_diagnostics: F<"color-diagnostics">, + HelpText<"Use colors in diagnostics">; + +def color_diagnostics_eq: J<"color-diagnostics=">, + HelpText<"Use colors in diagnostics">; + +def disable_new_dtags: F<"disable-new-dtags">, + HelpText<"Disable new dynamic tags">; + +def discard_all: F<"discard-all">, HelpText<"Delete all local symbols">; + +def discard_locals: F<"discard-locals">, + HelpText<"Delete temporary local symbols">; + +def discard_none: F<"discard-none">, + HelpText<"Keep all symbols in the symbol table">; + +def dynamic_linker: S<"dynamic-linker">, + HelpText<"Which dynamic linker to use">; + +def dynamic_list: S<"dynamic-list">, + HelpText<"Read a list of dynamic symbols">; + +def eh_frame_hdr: F<"eh-frame-hdr">, + HelpText<"Request creation of .eh_frame_hdr section and PT_GNU_EH_FRAME segment header">; + +def enable_new_dtags: F<"enable-new-dtags">, + HelpText<"Enable new dynamic tags">; + +def end_lib: F<"end-lib">, + HelpText<"End a grouping of objects that should be treated as if they were together in an archive">; + +def entry: S<"entry">, MetaVarName<"<entry>">, + HelpText<"Name of entry point symbol">; + +def error_limit: S<"error-limit">, + HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">; + +def export_dynamic: F<"export-dynamic">, + HelpText<"Put symbols in the dynamic symbol table">; + +def export_dynamic_symbol: S<"export-dynamic-symbol">, + HelpText<"Put a symbol in the dynamic symbol table">; + +def fatal_warnings: F<"fatal-warnings">, + HelpText<"Treat warnings as errors">; + +def fini: S<"fini">, MetaVarName<"<symbol>">, + HelpText<"Specify a finalizer function">; + +def full_shutdown : F<"full-shutdown">, + HelpText<"Perform a full shutdown instead of calling _exit">; + +def format: J<"format=">, MetaVarName<"<input-format>">, + HelpText<"Change the input format of the inputs following this option">; + +def gc_sections: F<"gc-sections">, + HelpText<"Enable garbage collection of unused sections">; + +def gdb_index: F<"gdb-index">, + HelpText<"Generate .gdb_index section">; + +def hash_style: S<"hash-style">, + HelpText<"Specify hash style (sysv, gnu or both)">; + +def help: F<"help">, HelpText<"Print option help">; + +def icf: F<"icf=all">, HelpText<"Enable identical code folding">; + +def image_base : J<"image-base=">, HelpText<"Set the base address">; + +def init: S<"init">, MetaVarName<"<symbol>">, + HelpText<"Specify an initializer function">; + +def l: JoinedOrSeparate<["-"], "l">, MetaVarName<"<libName>">, + HelpText<"Root name of library to use">; + +def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">, + HelpText<"Optimization level for LTO">; + +def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">; + +def nostdlib: F<"nostdlib">, + HelpText<"Only search directories specified on the command line">; + +def no_as_needed: F<"no-as-needed">, + HelpText<"Always DT_NEEDED for shared libraries">; + +def no_color_diagnostics: F<"no-color-diagnostics">, + HelpText<"Do not use colors in diagnostics">; + +def no_demangle: F<"no-demangle">, + HelpText<"Do not demangle symbol names">; + +def no_gc_sections: F<"no-gc-sections">, + HelpText<"Disable garbage collection of unused sections">; + +def no_gnu_unique: F<"no-gnu-unique">, + HelpText<"Disable STB_GNU_UNIQUE symbol binding">; + +def no_threads: F<"no-threads">, + HelpText<"Do not run the linker multi-threaded">; + +def no_whole_archive: F<"no-whole-archive">, + HelpText<"Restores the default behavior of loading archive members">; + +def noinhibit_exec: F<"noinhibit-exec">, + HelpText<"Retain the executable output file whenever it is still usable">; + +def nopie: F<"nopie">, HelpText<"Do not create a position independent executable">; + +def no_rosegment: F<"no-rosegment">, HelpText<"Do not put read-only non-executable sections in their own segment">; + +def no_undefined: F<"no-undefined">, + HelpText<"Report unresolved symbols even if the linker is creating a shared library">; + +def no_undefined_version: F<"no-undefined-version">, + HelpText<"Report version scripts that refer undefined symbols">; + +def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">, + HelpText<"Path to file to write output">; + +def oformat: Separate<["--"], "oformat">, MetaVarName<"<format>">, + HelpText<"Specify the binary format for the output object file">; + +def omagic: F<"omagic">, MetaVarName<"<magic>">, + HelpText<"Set the text and data sections to be readable and writable">; + +def pie: F<"pie">, HelpText<"Create a position independent executable">; + +def print_gc_sections: F<"print-gc-sections">, + HelpText<"List removed unused sections">; + +def reproduce: S<"reproduce">, + HelpText<"Dump linker invocation and input files for debugging">; + +def rpath: S<"rpath">, HelpText<"Add a DT_RUNPATH to the output">; + +def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; + +def retain_symbols_file: J<"retain-symbols-file=">, MetaVarName<"<file>">, + HelpText<"Retain only the symbols listed in the file">; + +def script: S<"script">, HelpText<"Read linker script">; + +def section_start: S<"section-start">, MetaVarName<"<address>">, + HelpText<"Set address of section">; + +def shared: F<"shared">, HelpText<"Build a shared object">; + +def soname: J<"soname=">, HelpText<"Set DT_SONAME">; + +def sort_section: S<"sort-section">, HelpText<"Specifies sections sorting rule when linkerscript is used">; + +def start_lib: F<"start-lib">, + HelpText<"Start a grouping of objects that should be treated as if they were together in an archive">; + +def strip_all: F<"strip-all">, HelpText<"Strip all symbols">; + +def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; + +def symbol_ordering_file: S<"symbol-ordering-file">, + HelpText<"Layout sections in the order specified by symbol file">; + +def sysroot: J<"sysroot=">, HelpText<"Set the system root">; + +def target1_rel: F<"target1-rel">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_REL32">; + +def target1_abs: F<"target1-abs">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_ABS32">; + +def target2: J<"target2=">, MetaVarName<"<type>">, HelpText<"Interpret R_ARM_TARGET2 as <type>, where <type> is one of rel, abs, or got-rel">; + +def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; + +def trace: F<"trace">, HelpText<"Print the names of the input files">; + +def trace_symbol : J<"trace-symbol=">, HelpText<"Trace references to symbols">; + +def undefined: S<"undefined">, + HelpText<"Force undefined symbol during linking">; + +def unresolved_symbols: J<"unresolved-symbols=">, + HelpText<"Determine how to handle unresolved symbols">; + +def rsp_quoting: J<"rsp-quoting=">, + HelpText<"Quoting style for response files. Values supported: windows|posix">; + +def v: Flag<["-"], "v">, HelpText<"Display the version number">; + +def verbose: F<"verbose">, HelpText<"Verbose mode">; + +def version: F<"version">, HelpText<"Display the version number and exit">; + +def version_script: S<"version-script">, + HelpText<"Read a version script">; + +def warn_common: F<"warn-common">, + HelpText<"Warn about duplicate common symbols">; + +def whole_archive: F<"whole-archive">, + HelpText<"Force load of all members in a static library">; + +def wrap: S<"wrap">, MetaVarName<"<symbol>">, + HelpText<"Use wrapper functions for symbol">; + +def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">, + HelpText<"Linker option extensions">; + +// Aliases +def alias_auxiliary: Separate<["-"], "f">, Alias<auxiliary>; +def alias_Bdynamic_call_shared: F<"call_shared">, Alias<Bdynamic>; +def alias_Bdynamic_dy: F<"dy">, Alias<Bdynamic>; +def alias_Bstatic_dn: F<"dn">, Alias<Bstatic>; +def alias_Bstatic_non_shared: F<"non_shared">, Alias<Bstatic>; +def alias_Bstatic_static: F<"static">, Alias<Bstatic>; +def alias_L__library_path: J<"library-path=">, Alias<L>; +def alias_discard_all_x: Flag<["-"], "x">, Alias<discard_all>; +def alias_discard_locals_X: Flag<["-"], "X">, Alias<discard_locals>; +def alias_dynamic_list: J<"dynamic-list=">, Alias<dynamic_list>; +def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>; +def alias_entry_entry: J<"entry=">, Alias<entry>; +def alias_error_limit: J<"error-limit=">, Alias<error_limit>; +def alias_export_dynamic_E: Flag<["-"], "E">, Alias<export_dynamic>; +def alias_export_dynamic_symbol: J<"export-dynamic-symbol=">, + Alias<export_dynamic_symbol>; +def alias_fini_fini: J<"fini=">, Alias<fini>; +def alias_format_b: S<"b">, Alias<format>; +def alias_hash_style_hash_style: J<"hash-style=">, Alias<hash_style>; +def alias_init_init: J<"init=">, Alias<init>; +def alias_l__library: J<"library=">, Alias<l>; +def alias_omagic: Flag<["-"], "N">, Alias<omagic>; +def alias_o_output: Joined<["--"], "output=">, Alias<o>; +def alias_o_output2 : Separate<["--"], "output">, Alias<o>; +def alias_pie_pic_executable: F<"pic-executable">, Alias<pie>; +def alias_relocatable_r: Flag<["-"], "r">, Alias<relocatable>; +def alias_retain_symbols_file: S<"retain-symbols-file">, Alias<retain_symbols_file>; +def alias_rpath_R: JoinedOrSeparate<["-"], "R">, Alias<rpath>; +def alias_rpath_rpath: J<"rpath=">, Alias<rpath>; +def alias_script_T: JoinedOrSeparate<["-"], "T">, Alias<script>; +def alias_shared_Bshareable: F<"Bshareable">, Alias<shared>; +def alias_soname_h: JoinedOrSeparate<["-"], "h">, Alias<soname>; +def alias_soname_soname: S<"soname">, Alias<soname>; +def alias_sort_section: J<"sort-section=">, Alias<sort_section>; +def alias_script: J<"script=">, Alias<script>; +def alias_strip_all: Flag<["-"], "s">, Alias<strip_all>; +def alias_strip_debug_S: Flag<["-"], "S">, Alias<strip_debug>; +def alias_Tbss: J<"Tbss=">, Alias<Tbss>; +def alias_Tdata: J<"Tdata=">, Alias<Tdata>; +def alias_trace: Flag<["-"], "t">, Alias<trace>; +def alias_trace_symbol_y : JoinedOrSeparate<["-"], "y">, Alias<trace_symbol>; +def alias_Ttext: J<"Ttext=">, Alias<Ttext>; +def alias_Ttext_segment: S<"Ttext-segment">, Alias<Ttext>; +def alias_Ttext_segment_eq: J<"Ttext-segment=">, Alias<Ttext>; +def alias_undefined_eq: J<"undefined=">, Alias<undefined>; +def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias<undefined>; +def alias_version_V: Flag<["-"], "V">, Alias<version>; +def alias_wrap_wrap: J<"wrap=">, Alias<wrap>; + +// Our symbol resolution algorithm handles symbols in archive files differently +// than traditional linkers, so we don't need --start-group and --end-group. +// These options are recongized for compatibility but ignored. +def end_group: F<"end-group">; +def end_group_paren: Flag<["-"], ")">; +def start_group: F<"start-group">; +def start_group_paren: Flag<["-"], "(">; + +// Ignore LTO plugin-related options. +// clang -flto passes -plugin and -plugin-opt to the linker. This is required +// for ld.gold and ld.bfd to get LTO working. But it's not for lld which doesn't +// rely on a plugin. Instead of detecting which linker is used on clang side we +// just ignore the option on lld side as it's easier. In fact, the linker could +// be called 'ld' and understanding which linker is used would require parsing of +// --version output. +def plugin: S<"plugin">; +def plugin_eq: J<"plugin=">; +def plugin_opt: S<"plugin-opt">; +def plugin_opt_eq: J<"plugin-opt=">; + +// Options listed below are silently ignored for now for compatibility. +def allow_shlib_undefined: F<"allow-shlib-undefined">; +def cref: Flag<["--"], "cref">; +def define_common: F<"define-common">; +def demangle: F<"demangle">; +def detect_odr_violations: F<"detect-odr-violations">; +def g: Flag<["-"], "g">; +def M: Flag<["-"], "M">; +def Map: JS<"Map">; +def no_add_needed: F<"no-add-needed">; +def no_allow_shlib_undefined: F<"no-allow-shlib-undefined">; +def no_copy_dt_needed_entries: F<"no-copy-dt-needed-entries">, + Alias<no_add_needed>; +def no_dynamic_linker: F<"no-dynamic-linker">; +def no_fatal_warnings: F<"no-fatal-warnings">; +def no_mmap_output_file: F<"no-mmap-output-file">; +def no_warn_common: F<"no-warn-common">; +def no_warn_mismatch: F<"no-warn-mismatch">; +def rpath_link: S<"rpath-link">; +def rpath_link_eq: J<"rpath-link=">; +def sort_common: F<"sort-common">; +def stats: F<"stats">; +def warn_execstack: F<"warn-execstack">; +def warn_shared_textrel: F<"warn-shared-textrel">; +def EB : F<"EB">; +def EL : F<"EL">; +def G: JoinedOrSeparate<["-"], "G">; +def Qy : F<"Qy">; + +// Aliases for ignored options +def alias_define_common_d: Flag<["-"], "d">, Alias<define_common>; +def alias_define_common_dc: F<"dc">, Alias<define_common>; +def alias_define_common_dp: F<"dp">, Alias<define_common>; +def alias_Map_eq: J<"Map=">, Alias<Map>; +def alias_version_script_version_script: J<"version-script=">, + Alias<version_script>; + +// LTO-related options. +def lto_aa_pipeline: J<"lto-aa-pipeline=">, + HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">; +def lto_newpm_passes: J<"lto-newpm-passes=">, + HelpText<"Passes to run during LTO">; +def lto_partitions: J<"lto-partitions=">, + HelpText<"Number of LTO codegen partitions">; +def disable_verify: F<"disable-verify">; +def mllvm: S<"mllvm">; +def save_temps: F<"save-temps">; +def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">; diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.cpp b/contrib/llvm/tools/lld/ELF/OutputSections.cpp new file mode 100644 index 000000000000..a9d951dcc745 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/OutputSections.cpp @@ -0,0 +1,707 @@ +//===- OutputSections.cpp -------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "OutputSections.h" +#include "Config.h" +#include "EhFrame.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Threads.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/SHA1.h" + +using namespace llvm; +using namespace llvm::dwarf; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +OutputSectionBase::OutputSectionBase(StringRef Name, uint32_t Type, + uint64_t Flags) + : Name(Name) { + this->Type = Type; + this->Flags = Flags; + this->Addralign = 1; +} + +uint32_t OutputSectionBase::getPhdrFlags() const { + uint32_t Ret = PF_R; + if (Flags & SHF_WRITE) + Ret |= PF_W; + if (Flags & SHF_EXECINSTR) + Ret |= PF_X; + return Ret; +} + +template <class ELFT> +void OutputSectionBase::writeHeaderTo(typename ELFT::Shdr *Shdr) { + Shdr->sh_entsize = Entsize; + Shdr->sh_addralign = Addralign; + Shdr->sh_type = Type; + Shdr->sh_offset = Offset; + Shdr->sh_flags = Flags; + Shdr->sh_info = Info; + Shdr->sh_link = Link; + Shdr->sh_addr = Addr; + Shdr->sh_size = Size; + Shdr->sh_name = ShName; +} + +template <class ELFT> static uint64_t getEntsize(uint32_t Type) { + switch (Type) { + case SHT_RELA: + return sizeof(typename ELFT::Rela); + case SHT_REL: + return sizeof(typename ELFT::Rel); + case SHT_MIPS_REGINFO: + return sizeof(Elf_Mips_RegInfo<ELFT>); + case SHT_MIPS_OPTIONS: + return sizeof(Elf_Mips_Options<ELFT>) + sizeof(Elf_Mips_RegInfo<ELFT>); + case SHT_MIPS_ABIFLAGS: + return sizeof(Elf_Mips_ABIFlags<ELFT>); + default: + return 0; + } +} + +template <class ELFT> +OutputSection<ELFT>::OutputSection(StringRef Name, uint32_t Type, uintX_t Flags) + : OutputSectionBase(Name, Type, Flags) { + this->Entsize = getEntsize<ELFT>(Type); +} + +template <typename ELFT> +static bool compareByFilePosition(InputSection<ELFT> *A, + InputSection<ELFT> *B) { + // Synthetic doesn't have link order dependecy, stable_sort will keep it last + if (A->kind() == InputSectionData::Synthetic || + B->kind() == InputSectionData::Synthetic) + return false; + auto *LA = cast<InputSection<ELFT>>(A->getLinkOrderDep()); + auto *LB = cast<InputSection<ELFT>>(B->getLinkOrderDep()); + OutputSectionBase *AOut = LA->OutSec; + OutputSectionBase *BOut = LB->OutSec; + if (AOut != BOut) + return AOut->SectionIndex < BOut->SectionIndex; + return LA->OutSecOff < LB->OutSecOff; +} + +template <class ELFT> void OutputSection<ELFT>::finalize() { + if ((this->Flags & SHF_LINK_ORDER) && !this->Sections.empty()) { + std::sort(Sections.begin(), Sections.end(), compareByFilePosition<ELFT>); + Size = 0; + assignOffsets(); + + // We must preserve the link order dependency of sections with the + // SHF_LINK_ORDER flag. The dependency is indicated by the sh_link field. We + // need to translate the InputSection sh_link to the OutputSection sh_link, + // all InputSections in the OutputSection have the same dependency. + if (auto *D = this->Sections.front()->getLinkOrderDep()) + this->Link = D->OutSec->SectionIndex; + } + + uint32_t Type = this->Type; + if (!Config->Relocatable || (Type != SHT_RELA && Type != SHT_REL)) + return; + + this->Link = In<ELFT>::SymTab->OutSec->SectionIndex; + // sh_info for SHT_REL[A] sections should contain the section header index of + // the section to which the relocation applies. + InputSectionBase<ELFT> *S = Sections[0]->getRelocatedSection(); + this->Info = S->OutSec->SectionIndex; +} + +template <class ELFT> +void OutputSection<ELFT>::addSection(InputSectionData *C) { + assert(C->Live); + auto *S = cast<InputSection<ELFT>>(C); + Sections.push_back(S); + S->OutSec = this; + this->updateAlignment(S->Alignment); + // Keep sh_entsize value of the input section to be able to perform merging + // later during a final linking using the generated relocatable object. + if (Config->Relocatable && (S->Flags & SHF_MERGE)) + this->Entsize = S->Entsize; +} + +// This function is called after we sort input sections +// and scan relocations to setup sections' offsets. +template <class ELFT> void OutputSection<ELFT>::assignOffsets() { + uintX_t Off = this->Size; + for (InputSection<ELFT> *S : Sections) { + Off = alignTo(Off, S->Alignment); + S->OutSecOff = Off; + Off += S->getSize(); + } + this->Size = Off; +} + +template <class ELFT> +void OutputSection<ELFT>::sort( + std::function<int(InputSection<ELFT> *S)> Order) { + typedef std::pair<unsigned, InputSection<ELFT> *> Pair; + auto Comp = [](const Pair &A, const Pair &B) { return A.first < B.first; }; + + std::vector<Pair> V; + for (InputSection<ELFT> *S : Sections) + V.push_back({Order(S), S}); + std::stable_sort(V.begin(), V.end(), Comp); + Sections.clear(); + for (Pair &P : V) + Sections.push_back(P.second); +} + +// Sorts input sections by section name suffixes, so that .foo.N comes +// before .foo.M if N < M. Used to sort .{init,fini}_array.N sections. +// We want to keep the original order if the priorities are the same +// because the compiler keeps the original initialization order in a +// translation unit and we need to respect that. +// For more detail, read the section of the GCC's manual about init_priority. +template <class ELFT> void OutputSection<ELFT>::sortInitFini() { + // Sort sections by priority. + sort([](InputSection<ELFT> *S) { return getPriority(S->Name); }); +} + +// Returns true if S matches /Filename.?\.o$/. +static bool isCrtBeginEnd(StringRef S, StringRef Filename) { + if (!S.endswith(".o")) + return false; + S = S.drop_back(2); + if (S.endswith(Filename)) + return true; + return !S.empty() && S.drop_back().endswith(Filename); +} + +static bool isCrtbegin(StringRef S) { return isCrtBeginEnd(S, "crtbegin"); } +static bool isCrtend(StringRef S) { return isCrtBeginEnd(S, "crtend"); } + +// .ctors and .dtors are sorted by this priority from highest to lowest. +// +// 1. The section was contained in crtbegin (crtbegin contains +// some sentinel value in its .ctors and .dtors so that the runtime +// can find the beginning of the sections.) +// +// 2. The section has an optional priority value in the form of ".ctors.N" +// or ".dtors.N" where N is a number. Unlike .{init,fini}_array, +// they are compared as string rather than number. +// +// 3. The section is just ".ctors" or ".dtors". +// +// 4. The section was contained in crtend, which contains an end marker. +// +// In an ideal world, we don't need this function because .init_array and +// .ctors are duplicate features (and .init_array is newer.) However, there +// are too many real-world use cases of .ctors, so we had no choice to +// support that with this rather ad-hoc semantics. +template <class ELFT> +static bool compCtors(const InputSection<ELFT> *A, + const InputSection<ELFT> *B) { + bool BeginA = isCrtbegin(A->getFile()->getName()); + bool BeginB = isCrtbegin(B->getFile()->getName()); + if (BeginA != BeginB) + return BeginA; + bool EndA = isCrtend(A->getFile()->getName()); + bool EndB = isCrtend(B->getFile()->getName()); + if (EndA != EndB) + return EndB; + StringRef X = A->Name; + StringRef Y = B->Name; + assert(X.startswith(".ctors") || X.startswith(".dtors")); + assert(Y.startswith(".ctors") || Y.startswith(".dtors")); + X = X.substr(6); + Y = Y.substr(6); + if (X.empty() && Y.empty()) + return false; + return X < Y; +} + +// Sorts input sections by the special rules for .ctors and .dtors. +// Unfortunately, the rules are different from the one for .{init,fini}_array. +// Read the comment above. +template <class ELFT> void OutputSection<ELFT>::sortCtorsDtors() { + std::stable_sort(Sections.begin(), Sections.end(), compCtors<ELFT>); +} + +// Fill [Buf, Buf + Size) with Filler. Filler is written in big +// endian order. This is used for linker script "=fillexp" command. +void fill(uint8_t *Buf, size_t Size, uint32_t Filler) { + uint8_t V[4]; + write32be(V, Filler); + size_t I = 0; + for (; I + 4 < Size; I += 4) + memcpy(Buf + I, V, 4); + memcpy(Buf + I, V, Size - I); +} + +template <class ELFT> void OutputSection<ELFT>::writeTo(uint8_t *Buf) { + Loc = Buf; + if (uint32_t Filler = Script<ELFT>::X->getFiller(this->Name)) + fill(Buf, this->Size, Filler); + + auto Fn = [=](InputSection<ELFT> *IS) { IS->writeTo(Buf); }; + forEach(Sections.begin(), Sections.end(), Fn); + + // Linker scripts may have BYTE()-family commands with which you + // can write arbitrary bytes to the output. Process them if any. + Script<ELFT>::X->writeDataBytes(this->Name, Buf); +} + +template <class ELFT> +EhOutputSection<ELFT>::EhOutputSection() + : OutputSectionBase(".eh_frame", SHT_PROGBITS, SHF_ALLOC) {} + +// Search for an existing CIE record or create a new one. +// CIE records from input object files are uniquified by their contents +// and where their relocations point to. +template <class ELFT> +template <class RelTy> +CieRecord *EhOutputSection<ELFT>::addCie(EhSectionPiece &Piece, + ArrayRef<RelTy> Rels) { + auto *Sec = cast<EhInputSection<ELFT>>(Piece.ID); + const endianness E = ELFT::TargetEndianness; + if (read32<E>(Piece.data().data() + 4) != 0) + fatal(toString(Sec) + ": CIE expected at beginning of .eh_frame"); + + SymbolBody *Personality = nullptr; + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI != (unsigned)-1) + Personality = &Sec->getFile()->getRelocTargetSym(Rels[FirstRelI]); + + // Search for an existing CIE by CIE contents/relocation target pair. + CieRecord *Cie = &CieMap[{Piece.data(), Personality}]; + + // If not found, create a new one. + if (Cie->Piece == nullptr) { + Cie->Piece = &Piece; + Cies.push_back(Cie); + } + return Cie; +} + +// There is one FDE per function. Returns true if a given FDE +// points to a live function. +template <class ELFT> +template <class RelTy> +bool EhOutputSection<ELFT>::isFdeLive(EhSectionPiece &Piece, + ArrayRef<RelTy> Rels) { + auto *Sec = cast<EhInputSection<ELFT>>(Piece.ID); + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI == (unsigned)-1) + fatal(toString(Sec) + ": FDE doesn't reference another section"); + const RelTy &Rel = Rels[FirstRelI]; + SymbolBody &B = Sec->getFile()->getRelocTargetSym(Rel); + auto *D = dyn_cast<DefinedRegular<ELFT>>(&B); + if (!D || !D->Section) + return false; + InputSectionBase<ELFT> *Target = D->Section->Repl; + return Target && Target->Live; +} + +// .eh_frame is a sequence of CIE or FDE records. In general, there +// is one CIE record per input object file which is followed by +// a list of FDEs. This function searches an existing CIE or create a new +// one and associates FDEs to the CIE. +template <class ELFT> +template <class RelTy> +void EhOutputSection<ELFT>::addSectionAux(EhInputSection<ELFT> *Sec, + ArrayRef<RelTy> Rels) { + const endianness E = ELFT::TargetEndianness; + + DenseMap<size_t, CieRecord *> OffsetToCie; + for (EhSectionPiece &Piece : Sec->Pieces) { + // The empty record is the end marker. + if (Piece.size() == 4) + return; + + size_t Offset = Piece.InputOff; + uint32_t ID = read32<E>(Piece.data().data() + 4); + if (ID == 0) { + OffsetToCie[Offset] = addCie(Piece, Rels); + continue; + } + + uint32_t CieOffset = Offset + 4 - ID; + CieRecord *Cie = OffsetToCie[CieOffset]; + if (!Cie) + fatal(toString(Sec) + ": invalid CIE reference"); + + if (!isFdeLive(Piece, Rels)) + continue; + Cie->FdePieces.push_back(&Piece); + NumFdes++; + } +} + +template <class ELFT> +void EhOutputSection<ELFT>::addSection(InputSectionData *C) { + auto *Sec = cast<EhInputSection<ELFT>>(C); + Sec->OutSec = this; + this->updateAlignment(Sec->Alignment); + Sections.push_back(Sec); + + // .eh_frame is a sequence of CIE or FDE records. This function + // splits it into pieces so that we can call + // SplitInputSection::getSectionPiece on the section. + Sec->split(); + if (Sec->Pieces.empty()) + return; + + if (Sec->NumRelocations) { + if (Sec->AreRelocsRela) + addSectionAux(Sec, Sec->relas()); + else + addSectionAux(Sec, Sec->rels()); + return; + } + addSectionAux(Sec, makeArrayRef<Elf_Rela>(nullptr, nullptr)); +} + +template <class ELFT> +static void writeCieFde(uint8_t *Buf, ArrayRef<uint8_t> D) { + memcpy(Buf, D.data(), D.size()); + + // Fix the size field. -4 since size does not include the size field itself. + const endianness E = ELFT::TargetEndianness; + write32<E>(Buf, alignTo(D.size(), sizeof(typename ELFT::uint)) - 4); +} + +template <class ELFT> void EhOutputSection<ELFT>::finalize() { + if (this->Size) + return; // Already finalized. + + size_t Off = 0; + for (CieRecord *Cie : Cies) { + Cie->Piece->OutputOff = Off; + Off += alignTo(Cie->Piece->size(), sizeof(uintX_t)); + + for (EhSectionPiece *Fde : Cie->FdePieces) { + Fde->OutputOff = Off; + Off += alignTo(Fde->size(), sizeof(uintX_t)); + } + } + this->Size = Off; +} + +template <class ELFT> static uint64_t readFdeAddr(uint8_t *Buf, int Size) { + const endianness E = ELFT::TargetEndianness; + switch (Size) { + case DW_EH_PE_udata2: + return read16<E>(Buf); + case DW_EH_PE_udata4: + return read32<E>(Buf); + case DW_EH_PE_udata8: + return read64<E>(Buf); + case DW_EH_PE_absptr: + if (ELFT::Is64Bits) + return read64<E>(Buf); + return read32<E>(Buf); + } + fatal("unknown FDE size encoding"); +} + +// Returns the VA to which a given FDE (on a mmap'ed buffer) is applied to. +// We need it to create .eh_frame_hdr section. +template <class ELFT> +typename ELFT::uint EhOutputSection<ELFT>::getFdePc(uint8_t *Buf, size_t FdeOff, + uint8_t Enc) { + // The starting address to which this FDE applies is + // stored at FDE + 8 byte. + size_t Off = FdeOff + 8; + uint64_t Addr = readFdeAddr<ELFT>(Buf + Off, Enc & 0x7); + if ((Enc & 0x70) == DW_EH_PE_absptr) + return Addr; + if ((Enc & 0x70) == DW_EH_PE_pcrel) + return Addr + this->Addr + Off; + fatal("unknown FDE size relative encoding"); +} + +template <class ELFT> void EhOutputSection<ELFT>::writeTo(uint8_t *Buf) { + const endianness E = ELFT::TargetEndianness; + for (CieRecord *Cie : Cies) { + size_t CieOffset = Cie->Piece->OutputOff; + writeCieFde<ELFT>(Buf + CieOffset, Cie->Piece->data()); + + for (EhSectionPiece *Fde : Cie->FdePieces) { + size_t Off = Fde->OutputOff; + writeCieFde<ELFT>(Buf + Off, Fde->data()); + + // FDE's second word should have the offset to an associated CIE. + // Write it. + write32<E>(Buf + Off + 4, Off + 4 - CieOffset); + } + } + + for (EhInputSection<ELFT> *S : Sections) + S->relocate(Buf, nullptr); + + // Construct .eh_frame_hdr. .eh_frame_hdr is a binary search table + // to get a FDE from an address to which FDE is applied. So here + // we obtain two addresses and pass them to EhFrameHdr object. + if (In<ELFT>::EhFrameHdr) { + for (CieRecord *Cie : Cies) { + uint8_t Enc = getFdeEncoding<ELFT>(Cie->Piece); + for (SectionPiece *Fde : Cie->FdePieces) { + uintX_t Pc = getFdePc(Buf, Fde->OutputOff, Enc); + uintX_t FdeVA = this->Addr + Fde->OutputOff; + In<ELFT>::EhFrameHdr->addFde(Pc, FdeVA); + } + } + } +} + +template <class ELFT> +MergeOutputSection<ELFT>::MergeOutputSection(StringRef Name, uint32_t Type, + uintX_t Flags, uintX_t Alignment) + : OutputSectionBase(Name, Type, Flags), + Builder(StringTableBuilder::RAW, Alignment) {} + +template <class ELFT> void MergeOutputSection<ELFT>::writeTo(uint8_t *Buf) { + Builder.write(Buf); +} + +template <class ELFT> +void MergeOutputSection<ELFT>::addSection(InputSectionData *C) { + auto *Sec = cast<MergeInputSection<ELFT>>(C); + Sec->OutSec = this; + this->updateAlignment(Sec->Alignment); + this->Entsize = Sec->Entsize; + Sections.push_back(Sec); +} + +template <class ELFT> bool MergeOutputSection<ELFT>::shouldTailMerge() const { + return (this->Flags & SHF_STRINGS) && Config->Optimize >= 2; +} + +template <class ELFT> void MergeOutputSection<ELFT>::finalizeTailMerge() { + // Add all string pieces to the string table builder to create section + // contents. + for (MergeInputSection<ELFT> *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Builder.add(Sec->getData(I)); + + // Fix the string table content. After this, the contents will never change. + Builder.finalize(); + this->Size = Builder.getSize(); + + // finalize() fixed tail-optimized strings, so we can now get + // offsets of strings. Get an offset for each string and save it + // to a corresponding StringPiece for easy access. + for (MergeInputSection<ELFT> *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); +} + +template <class ELFT> void MergeOutputSection<ELFT>::finalizeNoTailMerge() { + // Add all string pieces to the string table builder to create section + // contents. Because we are not tail-optimizing, offsets of strings are + // fixed when they are added to the builder (string table builder contains + // a hash table from strings to offsets). + for (MergeInputSection<ELFT> *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Sec->Pieces[I].OutputOff = Builder.add(Sec->getData(I)); + + Builder.finalizeInOrder(); + this->Size = Builder.getSize(); +} + +template <class ELFT> void MergeOutputSection<ELFT>::finalize() { + if (shouldTailMerge()) + finalizeTailMerge(); + else + finalizeNoTailMerge(); +} + +template <class ELFT> +static typename ELFT::uint getOutFlags(InputSectionBase<ELFT> *S) { + return S->Flags & ~SHF_GROUP & ~SHF_COMPRESSED; +} + +namespace llvm { +template <> struct DenseMapInfo<lld::elf::SectionKey> { + static lld::elf::SectionKey getEmptyKey(); + static lld::elf::SectionKey getTombstoneKey(); + static unsigned getHashValue(const lld::elf::SectionKey &Val); + static bool isEqual(const lld::elf::SectionKey &LHS, + const lld::elf::SectionKey &RHS); +}; +} + +template <class ELFT> +static SectionKey createKey(InputSectionBase<ELFT> *C, StringRef OutsecName) { + // The ELF spec just says + // ---------------------------------------------------------------- + // In the first phase, input sections that match in name, type and + // attribute flags should be concatenated into single sections. + // ---------------------------------------------------------------- + // + // However, it is clear that at least some flags have to be ignored for + // section merging. At the very least SHF_GROUP and SHF_COMPRESSED have to be + // ignored. We should not have two output .text sections just because one was + // in a group and another was not for example. + // + // It also seems that that wording was a late addition and didn't get the + // necessary scrutiny. + // + // Merging sections with different flags is expected by some users. One + // reason is that if one file has + // + // int *const bar __attribute__((section(".foo"))) = (int *)0; + // + // gcc with -fPIC will produce a read only .foo section. But if another + // file has + // + // int zed; + // int *const bar __attribute__((section(".foo"))) = (int *)&zed; + // + // gcc with -fPIC will produce a read write section. + // + // Last but not least, when using linker script the merge rules are forced by + // the script. Unfortunately, linker scripts are name based. This means that + // expressions like *(.foo*) can refer to multiple input sections with + // different flags. We cannot put them in different output sections or we + // would produce wrong results for + // + // start = .; *(.foo.*) end = .; *(.bar) + // + // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to + // another. The problem is that there is no way to layout those output + // sections such that the .foo sections are the only thing between the start + // and end symbols. + // + // Given the above issues, we instead merge sections by name and error on + // incompatible types and flags. + // + // The exception being SHF_MERGE, where we create different output sections + // for each alignment. This makes each output section simple. In case of + // relocatable object generation we do not try to perform merging and treat + // SHF_MERGE sections as regular ones, but also create different output + // sections for them to allow merging at final linking stage. + // + // Fortunately, creating symbols in the middle of a merge section is not + // supported by bfd or gold, so the SHF_MERGE exception should not cause + // problems with most linker scripts. + + typedef typename ELFT::uint uintX_t; + uintX_t Flags = C->Flags & (SHF_MERGE | SHF_STRINGS); + + uintX_t Alignment = 0; + if (isa<MergeInputSection<ELFT>>(C) || + (Config->Relocatable && (C->Flags & SHF_MERGE))) + Alignment = std::max<uintX_t>(C->Alignment, C->Entsize); + + return SectionKey{OutsecName, Flags, Alignment}; +} + +template <class ELFT> OutputSectionFactory<ELFT>::OutputSectionFactory() {} + +template <class ELFT> OutputSectionFactory<ELFT>::~OutputSectionFactory() {} + +template <class ELFT> +std::pair<OutputSectionBase *, bool> +OutputSectionFactory<ELFT>::create(InputSectionBase<ELFT> *C, + StringRef OutsecName) { + SectionKey Key = createKey(C, OutsecName); + return create(Key, C); +} + +static uint64_t getIncompatibleFlags(uint64_t Flags) { + return Flags & (SHF_ALLOC | SHF_TLS); +} + +template <class ELFT> +std::pair<OutputSectionBase *, bool> +OutputSectionFactory<ELFT>::create(const SectionKey &Key, + InputSectionBase<ELFT> *C) { + uintX_t Flags = getOutFlags(C); + OutputSectionBase *&Sec = Map[Key]; + if (Sec) { + if (getIncompatibleFlags(Sec->Flags) != getIncompatibleFlags(C->Flags)) + error("Section has flags incompatible with others with the same name " + + toString(C)); + if (Sec->Type != C->Type) + error("Section has different type from others with the same name " + + toString(C)); + Sec->Flags |= Flags; + return {Sec, false}; + } + + uint32_t Type = C->Type; + switch (C->kind()) { + case InputSectionBase<ELFT>::Regular: + case InputSectionBase<ELFT>::Synthetic: + Sec = make<OutputSection<ELFT>>(Key.Name, Type, Flags); + break; + case InputSectionBase<ELFT>::EHFrame: + return {Out<ELFT>::EhFrame, false}; + case InputSectionBase<ELFT>::Merge: + Sec = make<MergeOutputSection<ELFT>>(Key.Name, Type, Flags, Key.Alignment); + break; + } + return {Sec, true}; +} + +SectionKey DenseMapInfo<SectionKey>::getEmptyKey() { + return SectionKey{DenseMapInfo<StringRef>::getEmptyKey(), 0, 0}; +} + +SectionKey DenseMapInfo<SectionKey>::getTombstoneKey() { + return SectionKey{DenseMapInfo<StringRef>::getTombstoneKey(), 0, 0}; +} + +unsigned DenseMapInfo<SectionKey>::getHashValue(const SectionKey &Val) { + return hash_combine(Val.Name, Val.Flags, Val.Alignment); +} + +bool DenseMapInfo<SectionKey>::isEqual(const SectionKey &LHS, + const SectionKey &RHS) { + return DenseMapInfo<StringRef>::isEqual(LHS.Name, RHS.Name) && + LHS.Flags == RHS.Flags && LHS.Alignment == RHS.Alignment; +} + +namespace lld { +namespace elf { + +template void OutputSectionBase::writeHeaderTo<ELF32LE>(ELF32LE::Shdr *Shdr); +template void OutputSectionBase::writeHeaderTo<ELF32BE>(ELF32BE::Shdr *Shdr); +template void OutputSectionBase::writeHeaderTo<ELF64LE>(ELF64LE::Shdr *Shdr); +template void OutputSectionBase::writeHeaderTo<ELF64BE>(ELF64BE::Shdr *Shdr); + +template class OutputSection<ELF32LE>; +template class OutputSection<ELF32BE>; +template class OutputSection<ELF64LE>; +template class OutputSection<ELF64BE>; + +template class EhOutputSection<ELF32LE>; +template class EhOutputSection<ELF32BE>; +template class EhOutputSection<ELF64LE>; +template class EhOutputSection<ELF64BE>; + +template class MergeOutputSection<ELF32LE>; +template class MergeOutputSection<ELF32BE>; +template class MergeOutputSection<ELF64LE>; +template class MergeOutputSection<ELF64BE>; + +template class OutputSectionFactory<ELF32LE>; +template class OutputSectionFactory<ELF32BE>; +template class OutputSectionFactory<ELF64LE>; +template class OutputSectionFactory<ELF64BE>; +} +} diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.h b/contrib/llvm/tools/lld/ELF/OutputSections.h new file mode 100644 index 000000000000..45e1a232e2a9 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/OutputSections.h @@ -0,0 +1,268 @@ +//===- OutputSections.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_OUTPUT_SECTIONS_H +#define LLD_ELF_OUTPUT_SECTIONS_H + +#include "Config.h" +#include "Relocations.h" + +#include "lld/Core/LLVM.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ELF.h" + +namespace lld { +namespace elf { + +struct PhdrEntry; +class SymbolBody; +struct EhSectionPiece; +template <class ELFT> class EhInputSection; +template <class ELFT> class InputSection; +template <class ELFT> class InputSectionBase; +template <class ELFT> class MergeInputSection; +template <class ELFT> class OutputSection; +template <class ELFT> class ObjectFile; +template <class ELFT> class SharedFile; +template <class ELFT> class SharedSymbol; +template <class ELFT> class DefinedRegular; + +// This represents a section in an output file. +// Different sub classes represent different types of sections. Some contain +// input sections, others are created by the linker. +// The writer creates multiple OutputSections and assign them unique, +// non-overlapping file offsets and VAs. +class OutputSectionBase { +public: + enum Kind { + Base, + EHFrame, + Merge, + Regular, + }; + + OutputSectionBase(StringRef Name, uint32_t Type, uint64_t Flags); + void setLMAOffset(uint64_t LMAOff) { LMAOffset = LMAOff; } + uint64_t getLMA() const { return Addr + LMAOffset; } + template <typename ELFT> void writeHeaderTo(typename ELFT::Shdr *SHdr); + StringRef getName() const { return Name; } + + virtual void addSection(InputSectionData *C) {} + virtual Kind getKind() const { return Base; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == Base; + } + + unsigned SectionIndex; + + uint32_t getPhdrFlags() const; + + void updateAlignment(uint64_t Alignment) { + if (Alignment > Addralign) + Addralign = Alignment; + } + + // If true, this section will be page aligned on disk. + // Typically the first section of each PT_LOAD segment has this flag. + bool PageAlign = false; + + // Pointer to the first section in PT_LOAD segment, which this section + // also resides in. This field is used to correctly compute file offset + // of a section. When two sections share the same load segment, difference + // between their file offsets should be equal to difference between their + // virtual addresses. To compute some section offset we use the following + // formula: Off = Off_first + VA - VA_first. + OutputSectionBase *FirstInPtLoad = nullptr; + + virtual void finalize() {} + virtual void assignOffsets() {} + virtual void writeTo(uint8_t *Buf) {} + virtual ~OutputSectionBase() = default; + + StringRef Name; + + // The following fields correspond to Elf_Shdr members. + uint64_t Size = 0; + uint64_t Entsize = 0; + uint64_t Addralign = 0; + uint64_t Offset = 0; + uint64_t Flags = 0; + uint64_t LMAOffset = 0; + uint64_t Addr = 0; + uint32_t ShName = 0; + uint32_t Type = 0; + uint32_t Info = 0; + uint32_t Link = 0; +}; + +template <class ELFT> class OutputSection final : public OutputSectionBase { + +public: + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::uint uintX_t; + OutputSection(StringRef Name, uint32_t Type, uintX_t Flags); + void addSection(InputSectionData *C) override; + void sort(std::function<int(InputSection<ELFT> *S)> Order); + void sortInitFini(); + void sortCtorsDtors(); + void writeTo(uint8_t *Buf) override; + void finalize() override; + void assignOffsets() override; + Kind getKind() const override { return Regular; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == Regular; + } + std::vector<InputSection<ELFT> *> Sections; + + // Location in the output buffer. + uint8_t *Loc = nullptr; +}; + +template <class ELFT> +class MergeOutputSection final : public OutputSectionBase { + typedef typename ELFT::uint uintX_t; + +public: + MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags, + uintX_t Alignment); + void addSection(InputSectionData *S) override; + void writeTo(uint8_t *Buf) override; + void finalize() override; + bool shouldTailMerge() const; + Kind getKind() const override { return Merge; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == Merge; + } + +private: + void finalizeTailMerge(); + void finalizeNoTailMerge(); + + llvm::StringTableBuilder Builder; + std::vector<MergeInputSection<ELFT> *> Sections; +}; + +struct CieRecord { + EhSectionPiece *Piece = nullptr; + std::vector<EhSectionPiece *> FdePieces; +}; + +// Output section for .eh_frame. +template <class ELFT> class EhOutputSection final : public OutputSectionBase { + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + +public: + EhOutputSection(); + void writeTo(uint8_t *Buf) override; + void finalize() override; + bool empty() const { return Sections.empty(); } + + void addSection(InputSectionData *S) override; + Kind getKind() const override { return EHFrame; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == EHFrame; + } + + size_t NumFdes = 0; + +private: + template <class RelTy> + void addSectionAux(EhInputSection<ELFT> *S, llvm::ArrayRef<RelTy> Rels); + + template <class RelTy> + CieRecord *addCie(EhSectionPiece &Piece, ArrayRef<RelTy> Rels); + + template <class RelTy> + bool isFdeLive(EhSectionPiece &Piece, ArrayRef<RelTy> Rels); + + uintX_t getFdePc(uint8_t *Buf, size_t Off, uint8_t Enc); + + std::vector<EhInputSection<ELFT> *> Sections; + std::vector<CieRecord *> Cies; + + // CIE records are uniquified by their contents and personality functions. + llvm::DenseMap<std::pair<ArrayRef<uint8_t>, SymbolBody *>, CieRecord> CieMap; +}; + +// All output sections that are hadnled by the linker specially are +// globally accessible. Writer initializes them, so don't use them +// until Writer is initialized. +template <class ELFT> struct Out { + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Phdr Elf_Phdr; + + static uint8_t First; + static EhOutputSection<ELFT> *EhFrame; + static OutputSection<ELFT> *Bss; + static OutputSectionBase *Opd; + static uint8_t *OpdBuf; + static PhdrEntry *TlsPhdr; + static OutputSectionBase *DebugInfo; + static OutputSectionBase *ElfHeader; + static OutputSectionBase *ProgramHeaders; + static OutputSectionBase *PreinitArray; + static OutputSectionBase *InitArray; + static OutputSectionBase *FiniArray; +}; + +struct SectionKey { + StringRef Name; + uint64_t Flags; + uint64_t Alignment; +}; + +// This class knows how to create an output section for a given +// input section. Output section type is determined by various +// factors, including input section's sh_flags, sh_type and +// linker scripts. +template <class ELFT> class OutputSectionFactory { + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::uint uintX_t; + +public: + OutputSectionFactory(); + ~OutputSectionFactory(); + std::pair<OutputSectionBase *, bool> create(InputSectionBase<ELFT> *C, + StringRef OutsecName); + std::pair<OutputSectionBase *, bool> create(const SectionKey &Key, + InputSectionBase<ELFT> *C); + +private: + llvm::SmallDenseMap<SectionKey, OutputSectionBase *> Map; +}; + +template <class ELFT> uint64_t getHeaderSize() { + if (Config->OFormatBinary) + return 0; + return Out<ELFT>::ElfHeader->Size + Out<ELFT>::ProgramHeaders->Size; +} + +template <class ELFT> uint8_t Out<ELFT>::First; +template <class ELFT> EhOutputSection<ELFT> *Out<ELFT>::EhFrame; +template <class ELFT> OutputSection<ELFT> *Out<ELFT>::Bss; +template <class ELFT> OutputSectionBase *Out<ELFT>::Opd; +template <class ELFT> uint8_t *Out<ELFT>::OpdBuf; +template <class ELFT> PhdrEntry *Out<ELFT>::TlsPhdr; +template <class ELFT> OutputSectionBase *Out<ELFT>::DebugInfo; +template <class ELFT> OutputSectionBase *Out<ELFT>::ElfHeader; +template <class ELFT> OutputSectionBase *Out<ELFT>::ProgramHeaders; +template <class ELFT> OutputSectionBase *Out<ELFT>::PreinitArray; +template <class ELFT> OutputSectionBase *Out<ELFT>::InitArray; +template <class ELFT> OutputSectionBase *Out<ELFT>::FiniArray; +} // namespace elf +} // namespace lld + + +#endif diff --git a/contrib/llvm/tools/lld/ELF/README.md b/contrib/llvm/tools/lld/ELF/README.md new file mode 100644 index 000000000000..f1bfc9c15263 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/README.md @@ -0,0 +1 @@ +See docs/NewLLD.rst diff --git a/contrib/llvm/tools/lld/ELF/Relocations.cpp b/contrib/llvm/tools/lld/ELF/Relocations.cpp new file mode 100644 index 000000000000..f7dcc5d24e93 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Relocations.cpp @@ -0,0 +1,825 @@ +//===- Relocations.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains platform-independent functions to process relocations. +// I'll describe the overview of this file here. +// +// Simple relocations are easy to handle for the linker. For example, +// for R_X86_64_PC64 relocs, the linker just has to fix up locations +// with the relative offsets to the target symbols. It would just be +// reading records from relocation sections and applying them to output. +// +// But not all relocations are that easy to handle. For example, for +// R_386_GOTOFF relocs, the linker has to create new GOT entries for +// symbols if they don't exist, and fix up locations with GOT entry +// offsets from the beginning of GOT section. So there is more than +// fixing addresses in relocation processing. +// +// ELF defines a large number of complex relocations. +// +// The functions in this file analyze relocations and do whatever needs +// to be done. It includes, but not limited to, the following. +// +// - create GOT/PLT entries +// - create new relocations in .dynsym to let the dynamic linker resolve +// them at runtime (since ELF supports dynamic linking, not all +// relocations can be resolved at link-time) +// - create COPY relocs and reserve space in .bss +// - replace expensive relocs (in terms of runtime cost) with cheap ones +// - error out infeasible combinations such as PIC and non-relative relocs +// +// Note that the functions in this file don't actually apply relocations +// because it doesn't know about the output file nor the output file buffer. +// It instead stores Relocation objects to InputSection's Relocations +// vector to let it apply later in InputSection::writeTo. +// +//===----------------------------------------------------------------------===// + +#include "Relocations.h" +#include "Config.h" +#include "OutputSections.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Thunks.h" + +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support::endian; + +namespace lld { +namespace elf { + +static bool refersToGotEntry(RelExpr Expr) { + return isRelExprOneOf<R_GOT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOT_OFF, + R_MIPS_GOT_OFF32, R_MIPS_TLSGD, R_MIPS_TLSLD, + R_GOT_PAGE_PC, R_GOT_PC, R_GOT_FROM_END, R_TLSGD, + R_TLSGD_PC, R_TLSDESC, R_TLSDESC_PAGE>(Expr); +} + +static bool isPreemptible(const SymbolBody &Body, uint32_t Type) { + // In case of MIPS GP-relative relocations always resolve to a definition + // in a regular input file, ignoring the one-definition rule. So we, + // for example, should not attempt to create a dynamic relocation even + // if the target symbol is preemptible. There are two two MIPS GP-relative + // relocations R_MIPS_GPREL16 and R_MIPS_GPREL32. But only R_MIPS_GPREL16 + // can be against a preemptible symbol. + // To get MIPS relocation type we apply 0xff mask. In case of O32 ABI all + // relocation types occupy eight bit. In case of N64 ABI we extract first + // relocation from 3-in-1 packet because only the first relocation can + // be against a real symbol. + if (Config->EMachine == EM_MIPS && (Type & 0xff) == R_MIPS_GPREL16) + return false; + return Body.isPreemptible(); +} + +// This function is similar to the `handleTlsRelocation`. ARM and MIPS do not +// support any relaxations for TLS relocations so by factoring out ARM and MIPS +// handling in to the separate function we can simplify the code and do not +// pollute `handleTlsRelocation` by ARM and MIPS `ifs` statements. +template <class ELFT, class GOT> +static unsigned handleNoRelaxTlsRelocation( + GOT *Got, uint32_t Type, SymbolBody &Body, InputSectionBase<ELFT> &C, + typename ELFT::uint Offset, typename ELFT::uint Addend, RelExpr Expr) { + typedef typename ELFT::uint uintX_t; + auto addModuleReloc = [](SymbolBody &Body, GOT *Got, uintX_t Off, bool LD) { + // The Dynamic TLS Module Index Relocation can be statically resolved to 1 + // if we know that we are linking an executable. For ARM we resolve the + // relocation when writing the Got. MIPS has a custom Got implementation + // that writes the Module index in directly. + if (!Body.isPreemptible() && !Config->Pic && Config->EMachine == EM_ARM) + Got->Relocations.push_back( + {R_ABS, Target->TlsModuleIndexRel, Off, 0, &Body}); + else { + SymbolBody *Dest = LD ? nullptr : &Body; + In<ELFT>::RelaDyn->addReloc( + {Target->TlsModuleIndexRel, Got, Off, false, Dest, 0}); + } + }; + if (Expr == R_MIPS_TLSLD || Expr == R_TLSLD_PC) { + if (Got->addTlsIndex() && (Config->Pic || Config->EMachine == EM_ARM)) + addModuleReloc(Body, Got, Got->getTlsIndexOff(), true); + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + if (Target->isTlsGlobalDynamicRel(Type)) { + if (Got->addDynTlsEntry(Body) && + (Body.isPreemptible() || Config->EMachine == EM_ARM)) { + uintX_t Off = Got->getGlobalDynOffset(Body); + addModuleReloc(Body, Got, Off, false); + if (Body.isPreemptible()) + In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, Got, + Off + (uintX_t)sizeof(uintX_t), false, + &Body, 0}); + } + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + return 0; +} + +// Returns the number of relocations processed. +template <class ELFT> +static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body, + InputSectionBase<ELFT> &C, + typename ELFT::uint Offset, + typename ELFT::uint Addend, RelExpr Expr) { + if (!(C.Flags & SHF_ALLOC)) + return 0; + + if (!Body.isTls()) + return 0; + + typedef typename ELFT::uint uintX_t; + + if (Config->EMachine == EM_ARM) + return handleNoRelaxTlsRelocation<ELFT>(In<ELFT>::Got, Type, Body, C, + Offset, Addend, Expr); + if (Config->EMachine == EM_MIPS) + return handleNoRelaxTlsRelocation<ELFT>(In<ELFT>::MipsGot, Type, Body, C, + Offset, Addend, Expr); + + bool IsPreemptible = isPreemptible(Body, Type); + if ((Expr == R_TLSDESC || Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC_CALL) && + Config->Shared) { + if (In<ELFT>::Got->addDynTlsEntry(Body)) { + uintX_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); + In<ELFT>::RelaDyn->addReloc({Target->TlsDescRel, In<ELFT>::Got, Off, + !IsPreemptible, &Body, 0}); + } + if (Expr != R_TLSDESC_CALL) + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + + if (Expr == R_TLSLD_PC || Expr == R_TLSLD) { + // Local-Dynamic relocs can be relaxed to Local-Exec. + if (!Config->Shared) { + C.Relocations.push_back( + {R_RELAX_TLS_LD_TO_LE, Type, Offset, Addend, &Body}); + return 2; + } + if (In<ELFT>::Got->addTlsIndex()) + In<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, In<ELFT>::Got, + In<ELFT>::Got->getTlsIndexOff(), false, + nullptr, 0}); + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + + // Local-Dynamic relocs can be relaxed to Local-Exec. + if (Target->isTlsLocalDynamicRel(Type) && !Config->Shared) { + C.Relocations.push_back( + {R_RELAX_TLS_LD_TO_LE, Type, Offset, Addend, &Body}); + return 1; + } + + if (Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC || Expr == R_TLSDESC_CALL || + Target->isTlsGlobalDynamicRel(Type)) { + if (Config->Shared) { + if (In<ELFT>::Got->addDynTlsEntry(Body)) { + uintX_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); + In<ELFT>::RelaDyn->addReloc( + {Target->TlsModuleIndexRel, In<ELFT>::Got, Off, false, &Body, 0}); + + // If the symbol is preemptible we need the dynamic linker to write + // the offset too. + uintX_t OffsetOff = Off + (uintX_t)sizeof(uintX_t); + if (IsPreemptible) + In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, In<ELFT>::Got, + OffsetOff, false, &Body, 0}); + else + In<ELFT>::Got->Relocations.push_back( + {R_ABS, Target->TlsOffsetRel, OffsetOff, 0, &Body}); + } + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + + // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec + // depending on the symbol being locally defined or not. + if (IsPreemptible) { + C.Relocations.push_back( + {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_IE), Type, + Offset, Addend, &Body}); + if (!Body.isInGot()) { + In<ELFT>::Got->addEntry(Body); + In<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, In<ELFT>::Got, + Body.getGotOffset<ELFT>(), false, &Body, + 0}); + } + return Target->TlsGdRelaxSkip; + } + C.Relocations.push_back( + {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_LE), Type, + Offset, Addend, &Body}); + return Target->TlsGdRelaxSkip; + } + + // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally + // defined. + if (Target->isTlsInitialExecRel(Type) && !Config->Shared && !IsPreemptible) { + C.Relocations.push_back( + {R_RELAX_TLS_IE_TO_LE, Type, Offset, Addend, &Body}); + return 1; + } + return 0; +} + +template <endianness E> static int16_t readSignedLo16(const uint8_t *Loc) { + return read32<E>(Loc) & 0xffff; +} + +template <class RelTy> +static uint32_t getMipsPairType(const RelTy *Rel, const SymbolBody &Sym) { + switch (Rel->getType(Config->Mips64EL)) { + case R_MIPS_HI16: + return R_MIPS_LO16; + case R_MIPS_GOT16: + return Sym.isLocal() ? R_MIPS_LO16 : R_MIPS_NONE; + case R_MIPS_PCHI16: + return R_MIPS_PCLO16; + case R_MICROMIPS_HI16: + return R_MICROMIPS_LO16; + default: + return R_MIPS_NONE; + } +} + +template <class ELFT, class RelTy> +static int32_t findMipsPairedAddend(const uint8_t *Buf, const uint8_t *BufLoc, + SymbolBody &Sym, const RelTy *Rel, + const RelTy *End) { + uint32_t SymIndex = Rel->getSymbol(Config->Mips64EL); + uint32_t Type = getMipsPairType(Rel, Sym); + + // Some MIPS relocations use addend calculated from addend of the relocation + // itself and addend of paired relocation. ABI requires to compute such + // combined addend in case of REL relocation record format only. + // See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (RelTy::IsRela || Type == R_MIPS_NONE) + return 0; + + for (const RelTy *RI = Rel; RI != End; ++RI) { + if (RI->getType(Config->Mips64EL) != Type) + continue; + if (RI->getSymbol(Config->Mips64EL) != SymIndex) + continue; + const endianness E = ELFT::TargetEndianness; + return ((read32<E>(BufLoc) & 0xffff) << 16) + + readSignedLo16<E>(Buf + RI->r_offset); + } + warn("can't find matching " + toString(Type) + " relocation for " + + toString(Rel->getType(Config->Mips64EL))); + return 0; +} + +// True if non-preemptable symbol always has the same value regardless of where +// the DSO is loaded. +template <class ELFT> static bool isAbsolute(const SymbolBody &Body) { + if (Body.isUndefined()) + return !Body.isLocal() && Body.symbol()->isWeak(); + if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(&Body)) + return DR->Section == nullptr; // Absolute symbol. + return false; +} + +template <class ELFT> static bool isAbsoluteValue(const SymbolBody &Body) { + return isAbsolute<ELFT>(Body) || Body.isTls(); +} + +static bool needsPlt(RelExpr Expr) { + return isRelExprOneOf<R_PLT_PC, R_PPC_PLT_OPD, R_PLT, R_PLT_PAGE_PC, + R_THUNK_PLT_PC>(Expr); +} + +// True if this expression is of the form Sym - X, where X is a position in the +// file (PC, or GOT for example). +static bool isRelExpr(RelExpr Expr) { + return isRelExprOneOf<R_PC, R_GOTREL, R_GOTREL_FROM_END, R_MIPS_GOTREL, + R_PAGE_PC, R_RELAX_GOT_PC, R_THUNK_PC, R_THUNK_PLT_PC>( + Expr); +} + +template <class ELFT> +static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type, + const SymbolBody &Body, + InputSectionBase<ELFT> &S, + typename ELFT::uint RelOff) { + // These expressions always compute a constant + if (isRelExprOneOf<R_SIZE, R_GOT_FROM_END, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, + R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_TLSGD, + R_GOT_PAGE_PC, R_GOT_PC, R_PLT_PC, R_TLSGD_PC, R_TLSGD, + R_PPC_PLT_OPD, R_TLSDESC_CALL, R_TLSDESC_PAGE, R_HINT, + R_THUNK_PC, R_THUNK_PLT_PC>(E)) + return true; + + // These never do, except if the entire file is position dependent or if + // only the low bits are used. + if (E == R_GOT || E == R_PLT || E == R_TLSDESC) + return Target->usesOnlyLowPageBits(Type) || !Config->Pic; + + if (isPreemptible(Body, Type)) + return false; + + if (!Config->Pic) + return true; + + bool AbsVal = isAbsoluteValue<ELFT>(Body); + bool RelE = isRelExpr(E); + if (AbsVal && !RelE) + return true; + if (!AbsVal && RelE) + return true; + + // Relative relocation to an absolute value. This is normally unrepresentable, + // but if the relocation refers to a weak undefined symbol, we allow it to + // resolve to the image base. This is a little strange, but it allows us to + // link function calls to such symbols. Normally such a call will be guarded + // with a comparison, which will load a zero from the GOT. + // Another special case is MIPS _gp_disp symbol which represents offset + // between start of a function and '_gp' value and defined as absolute just + // to simplify the code. + if (AbsVal && RelE) { + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) + return true; + if (&Body == ElfSym<ELFT>::MipsGpDisp) + return true; + error(S.getLocation(RelOff) + ": relocation " + toString(Type) + + " cannot refer to absolute symbol '" + toString(Body) + + "' defined in " + toString(Body.File)); + return true; + } + + return Target->usesOnlyLowPageBits(Type); +} + +static RelExpr toPlt(RelExpr Expr) { + if (Expr == R_PPC_OPD) + return R_PPC_PLT_OPD; + if (Expr == R_PC) + return R_PLT_PC; + if (Expr == R_PAGE_PC) + return R_PLT_PAGE_PC; + if (Expr == R_ABS) + return R_PLT; + return Expr; +} + +static RelExpr fromPlt(RelExpr Expr) { + // We decided not to use a plt. Optimize a reference to the plt to a + // reference to the symbol itself. + if (Expr == R_PLT_PC) + return R_PC; + if (Expr == R_PPC_PLT_OPD) + return R_PPC_OPD; + if (Expr == R_PLT) + return R_ABS; + return Expr; +} + +template <class ELFT> static uint32_t getAlignment(SharedSymbol<ELFT> *SS) { + typedef typename ELFT::uint uintX_t; + + uintX_t SecAlign = SS->file()->getSection(SS->Sym)->sh_addralign; + uintX_t SymValue = SS->Sym.st_value; + int TrailingZeros = + std::min(countTrailingZeros(SecAlign), countTrailingZeros(SymValue)); + return 1 << TrailingZeros; +} + +// Reserve space in .bss for copy relocation. +template <class ELFT> static void addCopyRelSymbol(SharedSymbol<ELFT> *SS) { + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Sym Elf_Sym; + + // Copy relocation against zero-sized symbol doesn't make sense. + uintX_t SymSize = SS->template getSize<ELFT>(); + if (SymSize == 0) + fatal("cannot create a copy relocation for symbol " + toString(*SS)); + + uintX_t Alignment = getAlignment(SS); + uintX_t Off = alignTo(Out<ELFT>::Bss->Size, Alignment); + Out<ELFT>::Bss->Size = Off + SymSize; + Out<ELFT>::Bss->updateAlignment(Alignment); + uintX_t Shndx = SS->Sym.st_shndx; + uintX_t Value = SS->Sym.st_value; + // Look through the DSO's dynamic symbol table for aliases and create a + // dynamic symbol for each one. This causes the copy relocation to correctly + // interpose any aliases. + for (const Elf_Sym &S : SS->file()->getGlobalSymbols()) { + if (S.st_shndx != Shndx || S.st_value != Value) + continue; + auto *Alias = dyn_cast_or_null<SharedSymbol<ELFT>>( + Symtab<ELFT>::X->find(check(S.getName(SS->file()->getStringTable())))); + if (!Alias) + continue; + Alias->OffsetInBss = Off; + Alias->NeedsCopyOrPltAddr = true; + Alias->symbol()->IsUsedInRegularObj = true; + } + In<ELFT>::RelaDyn->addReloc( + {Target->CopyRel, Out<ELFT>::Bss, SS->OffsetInBss, false, SS, 0}); +} + +template <class ELFT> +static RelExpr adjustExpr(const elf::ObjectFile<ELFT> &File, SymbolBody &Body, + bool IsWrite, RelExpr Expr, uint32_t Type, + const uint8_t *Data, InputSectionBase<ELFT> &S, + typename ELFT::uint RelOff) { + bool Preemptible = isPreemptible(Body, Type); + if (Body.isGnuIFunc()) { + Expr = toPlt(Expr); + } else if (!Preemptible) { + if (needsPlt(Expr)) + Expr = fromPlt(Expr); + if (Expr == R_GOT_PC && !isAbsoluteValue<ELFT>(Body)) + Expr = Target->adjustRelaxExpr(Type, Data, Expr); + } + Expr = Target->getThunkExpr(Expr, Type, File, Body); + + if (IsWrite || isStaticLinkTimeConstant<ELFT>(Expr, Type, Body, S, RelOff)) + return Expr; + + // This relocation would require the dynamic linker to write a value to read + // only memory. We can hack around it if we are producing an executable and + // the refered symbol can be preemepted to refer to the executable. + if (Config->Shared || (Config->Pic && !isRelExpr(Expr))) { + error(S.getLocation(RelOff) + ": can't create dynamic relocation " + + toString(Type) + " against " + + (Body.getName().empty() ? "local symbol in readonly segment" + : "symbol '" + toString(Body) + "'") + + " defined in " + toString(Body.File)); + return Expr; + } + if (Body.getVisibility() != STV_DEFAULT) { + error(S.getLocation(RelOff) + ": cannot preempt symbol '" + toString(Body) + + "' defined in " + toString(Body.File)); + return Expr; + } + if (Body.isObject()) { + // Produce a copy relocation. + auto *B = cast<SharedSymbol<ELFT>>(&Body); + if (!B->needsCopy()) + addCopyRelSymbol(B); + return Expr; + } + if (Body.isFunc()) { + // This handles a non PIC program call to function in a shared library. In + // an ideal world, we could just report an error saying the relocation can + // overflow at runtime. In the real world with glibc, crt1.o has a + // R_X86_64_PC32 pointing to libc.so. + // + // The general idea on how to handle such cases is to create a PLT entry and + // use that as the function value. + // + // For the static linking part, we just return a plt expr and everything + // else will use the the PLT entry as the address. + // + // The remaining problem is making sure pointer equality still works. We + // need the help of the dynamic linker for that. We let it know that we have + // a direct reference to a so symbol by creating an undefined symbol with a + // non zero st_value. Seeing that, the dynamic linker resolves the symbol to + // the value of the symbol we created. This is true even for got entries, so + // pointer equality is maintained. To avoid an infinite loop, the only entry + // that points to the real function is a dedicated got entry used by the + // plt. That is identified by special relocation types (R_X86_64_JUMP_SLOT, + // R_386_JMP_SLOT, etc). + Body.NeedsCopyOrPltAddr = true; + return toPlt(Expr); + } + error("symbol '" + toString(Body) + "' defined in " + toString(Body.File) + + " is missing type"); + + return Expr; +} + +template <class ELFT, class RelTy> +static typename ELFT::uint computeAddend(const elf::ObjectFile<ELFT> &File, + const uint8_t *SectionData, + const RelTy *End, const RelTy &RI, + RelExpr Expr, SymbolBody &Body) { + typedef typename ELFT::uint uintX_t; + + uint32_t Type = RI.getType(Config->Mips64EL); + uintX_t Addend = getAddend<ELFT>(RI); + const uint8_t *BufLoc = SectionData + RI.r_offset; + if (!RelTy::IsRela) + Addend += Target->getImplicitAddend(BufLoc, Type); + if (Config->EMachine == EM_MIPS) { + Addend += findMipsPairedAddend<ELFT>(SectionData, BufLoc, Body, &RI, End); + if (Type == R_MIPS_LO16 && Expr == R_PC) + // R_MIPS_LO16 expression has R_PC type iif the target is _gp_disp + // symbol. In that case we should use the following formula for + // calculation "AHL + GP - P + 4". Let's add 4 right here. + // For details see p. 4-19 at + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + Addend += 4; + if (Expr == R_MIPS_GOTREL && Body.isLocal()) + Addend += File.MipsGp0; + } + if (Config->Pic && Config->EMachine == EM_PPC64 && Type == R_PPC64_TOC) + Addend += getPPC64TocBase(); + return Addend; +} + +template <class ELFT> +static void reportUndefined(SymbolBody &Sym, InputSectionBase<ELFT> &S, + typename ELFT::uint Offset) { + if (Config->UnresolvedSymbols == UnresolvedPolicy::Ignore) + return; + + if (Config->Shared && Sym.symbol()->Visibility == STV_DEFAULT && + Config->UnresolvedSymbols != UnresolvedPolicy::NoUndef) + return; + + std::string Msg = + S.getLocation(Offset) + ": undefined symbol '" + toString(Sym) + "'"; + + if (Config->UnresolvedSymbols == UnresolvedPolicy::Warn) + warn(Msg); + else + error(Msg); +} + +template <class RelTy> +static std::pair<uint32_t, uint32_t> +mergeMipsN32RelTypes(uint32_t Type, uint32_t Offset, RelTy *I, RelTy *E) { + // MIPS N32 ABI treats series of successive relocations with the same offset + // as a single relocation. The similar approach used by N64 ABI, but this ABI + // packs all relocations into the single relocation record. Here we emulate + // this for the N32 ABI. Iterate over relocation with the same offset and put + // theirs types into the single bit-set. + uint32_t Processed = 0; + for (; I != E && Offset == I->r_offset; ++I) { + ++Processed; + Type |= I->getType(Config->Mips64EL) << (8 * Processed); + } + return std::make_pair(Type, Processed); +} + +// The reason we have to do this early scan is as follows +// * To mmap the output file, we need to know the size +// * For that, we need to know how many dynamic relocs we will have. +// It might be possible to avoid this by outputting the file with write: +// * Write the allocated output sections, computing addresses. +// * Apply relocations, recording which ones require a dynamic reloc. +// * Write the dynamic relocations. +// * Write the rest of the file. +// This would have some drawbacks. For example, we would only know if .rela.dyn +// is needed after applying relocations. If it is, it will go after rw and rx +// sections. Given that it is ro, we will need an extra PT_LOAD. This +// complicates things for the dynamic linker and means we would have to reserve +// space for the extra PT_LOAD even if we end up not using it. +template <class ELFT, class RelTy> +static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { + typedef typename ELFT::uint uintX_t; + + bool IsWrite = C.Flags & SHF_WRITE; + + auto AddDyn = [=](const DynamicReloc<ELFT> &Reloc) { + In<ELFT>::RelaDyn->addReloc(Reloc); + }; + + const elf::ObjectFile<ELFT> *File = C.getFile(); + ArrayRef<uint8_t> SectionData = C.Data; + const uint8_t *Buf = SectionData.begin(); + + ArrayRef<EhSectionPiece> Pieces; + if (auto *Eh = dyn_cast<EhInputSection<ELFT>>(&C)) + Pieces = Eh->Pieces; + + ArrayRef<EhSectionPiece>::iterator PieceI = Pieces.begin(); + ArrayRef<EhSectionPiece>::iterator PieceE = Pieces.end(); + + for (auto I = Rels.begin(), E = Rels.end(); I != E; ++I) { + const RelTy &RI = *I; + SymbolBody &Body = File->getRelocTargetSym(RI); + uint32_t Type = RI.getType(Config->Mips64EL); + + if (Config->MipsN32Abi) { + uint32_t Processed; + std::tie(Type, Processed) = + mergeMipsN32RelTypes(Type, RI.r_offset, I + 1, E); + I += Processed; + } + + // We only report undefined symbols if they are referenced somewhere in the + // code. + if (!Body.isLocal() && Body.isUndefined() && !Body.symbol()->isWeak()) + reportUndefined(Body, C, RI.r_offset); + + RelExpr Expr = Target->getRelExpr(Type, Body); + bool Preemptible = isPreemptible(Body, Type); + Expr = adjustExpr(*File, Body, IsWrite, Expr, Type, Buf + RI.r_offset, C, + RI.r_offset); + if (ErrorCount) + continue; + + // Skip a relocation that points to a dead piece + // in a eh_frame section. + while (PieceI != PieceE && + (PieceI->InputOff + PieceI->size() <= RI.r_offset)) + ++PieceI; + + // Compute the offset of this section in the output section. We do it here + // to try to compute it only once. + uintX_t Offset; + if (PieceI != PieceE) { + assert(PieceI->InputOff <= RI.r_offset && "Relocation not in any piece"); + if (PieceI->OutputOff == -1) + continue; + Offset = PieceI->OutputOff + RI.r_offset - PieceI->InputOff; + } else { + Offset = RI.r_offset; + } + + // This relocation does not require got entry, but it is relative to got and + // needs it to be created. Here we request for that. + if (Expr == R_GOTONLY_PC || Expr == R_GOTONLY_PC_FROM_END || + Expr == R_GOTREL || Expr == R_GOTREL_FROM_END || Expr == R_PPC_TOC) + In<ELFT>::Got->HasGotOffRel = true; + + uintX_t Addend = computeAddend(*File, Buf, E, RI, Expr, Body); + + if (unsigned Processed = + handleTlsRelocation<ELFT>(Type, Body, C, Offset, Addend, Expr)) { + I += (Processed - 1); + continue; + } + + // Ignore "hint" and TLS Descriptor call relocation because they are + // only markers for relaxation. + if (isRelExprOneOf<R_HINT, R_TLSDESC_CALL>(Expr)) + continue; + + if (needsPlt(Expr) || + isRelExprOneOf<R_THUNK_ABS, R_THUNK_PC, R_THUNK_PLT_PC>(Expr) || + refersToGotEntry(Expr) || !isPreemptible(Body, Type)) { + // If the relocation points to something in the file, we can process it. + bool Constant = + isStaticLinkTimeConstant<ELFT>(Expr, Type, Body, C, RI.r_offset); + + // If the output being produced is position independent, the final value + // is still not known. In that case we still need some help from the + // dynamic linker. We can however do better than just copying the incoming + // relocation. We can process some of it and and just ask the dynamic + // linker to add the load address. + if (!Constant) + AddDyn({Target->RelativeRel, &C, Offset, true, &Body, Addend}); + + // If the produced value is a constant, we just remember to write it + // when outputting this section. We also have to do it if the format + // uses Elf_Rel, since in that case the written value is the addend. + if (Constant || !RelTy::IsRela) + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + } else { + // We don't know anything about the finaly symbol. Just ask the dynamic + // linker to handle the relocation for us. + if (!Target->isPicRel(Type)) + error(C.getLocation(Offset) + ": relocation " + toString(Type) + + " cannot be used against shared object; recompile with -fPIC."); + AddDyn({Target->getDynRel(Type), &C, Offset, false, &Body, Addend}); + + // MIPS ABI turns using of GOT and dynamic relocations inside out. + // While regular ABI uses dynamic relocations to fill up GOT entries + // MIPS ABI requires dynamic linker to fills up GOT entries using + // specially sorted dynamic symbol table. This affects even dynamic + // relocations against symbols which do not require GOT entries + // creation explicitly, i.e. do not have any GOT-relocations. So if + // a preemptible symbol has a dynamic relocation we anyway have + // to create a GOT entry for it. + // If a non-preemptible symbol has a dynamic relocation against it, + // dynamic linker takes it st_value, adds offset and writes down + // result of the dynamic relocation. In case of preemptible symbol + // dynamic linker performs symbol resolution, writes the symbol value + // to the GOT entry and reads the GOT entry when it needs to perform + // a dynamic relocation. + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf p.4-19 + if (Config->EMachine == EM_MIPS) + In<ELFT>::MipsGot->addEntry(Body, Addend, Expr); + continue; + } + + // At this point we are done with the relocated position. Some relocations + // also require us to create a got or plt entry. + + // If a relocation needs PLT, we create a PLT and a GOT slot for the symbol. + if (needsPlt(Expr)) { + if (Body.isInPlt()) + continue; + + if (Body.isGnuIFunc() && !Preemptible) { + In<ELFT>::Iplt->addEntry(Body); + In<ELFT>::IgotPlt->addEntry(Body); + In<ELFT>::RelaIplt->addReloc({Target->IRelativeRel, In<ELFT>::IgotPlt, + Body.getGotPltOffset<ELFT>(), + !Preemptible, &Body, 0}); + } else { + In<ELFT>::Plt->addEntry(Body); + In<ELFT>::GotPlt->addEntry(Body); + In<ELFT>::RelaPlt->addReloc({Target->PltRel, In<ELFT>::GotPlt, + Body.getGotPltOffset<ELFT>(), !Preemptible, + &Body, 0}); + } + continue; + } + + if (refersToGotEntry(Expr)) { + if (Config->EMachine == EM_MIPS) { + // MIPS ABI has special rules to process GOT entries and doesn't + // require relocation entries for them. A special case is TLS + // relocations. In that case dynamic loader applies dynamic + // relocations to initialize TLS GOT entries. + // See "Global Offset Table" in Chapter 5 in the following document + // for detailed description: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + In<ELFT>::MipsGot->addEntry(Body, Addend, Expr); + if (Body.isTls() && Body.isPreemptible()) + AddDyn({Target->TlsGotRel, In<ELFT>::MipsGot, + Body.getGotOffset<ELFT>(), false, &Body, 0}); + continue; + } + + if (Body.isInGot()) + continue; + + In<ELFT>::Got->addEntry(Body); + uintX_t Off = Body.getGotOffset<ELFT>(); + uint32_t DynType; + RelExpr GotRE = R_ABS; + if (Body.isTls()) { + DynType = Target->TlsGotRel; + GotRE = R_TLS; + } else if (!Preemptible && Config->Pic && !isAbsolute<ELFT>(Body)) + DynType = Target->RelativeRel; + else + DynType = Target->GotRel; + + // FIXME: this logic is almost duplicated above. + bool Constant = !Preemptible && !(Config->Pic && !isAbsolute<ELFT>(Body)); + if (!Constant) + AddDyn({DynType, In<ELFT>::Got, Off, !Preemptible, &Body, 0}); + if (Constant || (!RelTy::IsRela && !Preemptible)) + In<ELFT>::Got->Relocations.push_back({GotRE, DynType, Off, 0, &Body}); + continue; + } + } +} + +template <class ELFT> void scanRelocations(InputSectionBase<ELFT> &S) { + if (S.AreRelocsRela) + scanRelocs(S, S.relas()); + else + scanRelocs(S, S.rels()); +} + +template <class ELFT, class RelTy> +static void createThunks(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { + const elf::ObjectFile<ELFT> *File = C.getFile(); + for (const RelTy &Rel : Rels) { + SymbolBody &Body = File->getRelocTargetSym(Rel); + uint32_t Type = Rel.getType(Config->Mips64EL); + RelExpr Expr = Target->getRelExpr(Type, Body); + if (!isPreemptible(Body, Type) && needsPlt(Expr)) + Expr = fromPlt(Expr); + Expr = Target->getThunkExpr(Expr, Type, *File, Body); + // Some targets might require creation of thunks for relocations. + // Now we support only MIPS which requires LA25 thunk to call PIC + // code from non-PIC one, and ARM which requires interworking. + if (Expr == R_THUNK_ABS || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC) { + auto *Sec = cast<InputSection<ELFT>>(&C); + addThunk<ELFT>(Type, Body, *Sec); + } + } +} + +template <class ELFT> void createThunks(InputSectionBase<ELFT> &S) { + if (S.AreRelocsRela) + createThunks(S, S.relas()); + else + createThunks(S, S.rels()); +} + +template void scanRelocations<ELF32LE>(InputSectionBase<ELF32LE> &); +template void scanRelocations<ELF32BE>(InputSectionBase<ELF32BE> &); +template void scanRelocations<ELF64LE>(InputSectionBase<ELF64LE> &); +template void scanRelocations<ELF64BE>(InputSectionBase<ELF64BE> &); + +template void createThunks<ELF32LE>(InputSectionBase<ELF32LE> &); +template void createThunks<ELF32BE>(InputSectionBase<ELF32BE> &); +template void createThunks<ELF64LE>(InputSectionBase<ELF64LE> &); +template void createThunks<ELF64BE>(InputSectionBase<ELF64BE> &); +} +} diff --git a/contrib/llvm/tools/lld/ELF/Relocations.h b/contrib/llvm/tools/lld/ELF/Relocations.h new file mode 100644 index 000000000000..b5825bdd5e59 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Relocations.h @@ -0,0 +1,130 @@ +//===- Relocations.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_RELOCATIONS_H +#define LLD_ELF_RELOCATIONS_H + +#include "lld/Core/LLVM.h" + +namespace lld { +namespace elf { +class SymbolBody; +class InputSectionData; +template <class ELFT> class InputSection; +template <class ELFT> class InputSectionBase; + +// List of target-independent relocation types. Relocations read +// from files are converted to these types so that the main code +// doesn't have to know about architecture-specific details. +enum RelExpr { + R_ABS, + R_GOT, + R_GOTONLY_PC, + R_GOTONLY_PC_FROM_END, + R_GOTREL, + R_GOTREL_FROM_END, + R_GOT_FROM_END, + R_GOT_OFF, + R_GOT_PAGE_PC, + R_GOT_PC, + R_HINT, + R_MIPS_GOT_LOCAL_PAGE, + R_MIPS_GOT_OFF, + R_MIPS_GOT_OFF32, + R_MIPS_GOTREL, + R_MIPS_TLSGD, + R_MIPS_TLSLD, + R_NEG_TLS, + R_PAGE_PC, + R_PC, + R_PLT, + R_PLT_PC, + R_PLT_PAGE_PC, + R_PPC_OPD, + R_PPC_PLT_OPD, + R_PPC_TOC, + R_RELAX_GOT_PC, + R_RELAX_GOT_PC_NOPIC, + R_RELAX_TLS_GD_TO_IE, + R_RELAX_TLS_GD_TO_IE_END, + R_RELAX_TLS_GD_TO_IE_ABS, + R_RELAX_TLS_GD_TO_IE_PAGE_PC, + R_RELAX_TLS_GD_TO_LE, + R_RELAX_TLS_GD_TO_LE_NEG, + R_RELAX_TLS_IE_TO_LE, + R_RELAX_TLS_LD_TO_LE, + R_SIZE, + R_THUNK_ABS, + R_THUNK_PC, + R_THUNK_PLT_PC, + R_TLS, + R_TLSDESC, + R_TLSDESC_PAGE, + R_TLSDESC_CALL, + R_TLSGD, + R_TLSGD_PC, + R_TLSLD, + R_TLSLD_PC, +}; + +// Build a bitmask with one bit set for each RelExpr. +// +// Constexpr function arguments can't be used in static asserts, so we +// use template arguments to build the mask. +// But function template partial specializations don't exist (needed +// for base case of the recursion), so we need a dummy struct. +template <RelExpr... Exprs> struct RelExprMaskBuilder { + static inline uint64_t build() { return 0; } +}; + +// Specialization for recursive case. +template <RelExpr Head, RelExpr... Tail> +struct RelExprMaskBuilder<Head, Tail...> { + static inline uint64_t build() { + static_assert(0 <= Head && Head < 64, + "RelExpr is too large for 64-bit mask!"); + return (uint64_t(1) << Head) | RelExprMaskBuilder<Tail...>::build(); + } +}; + +// Return true if `Expr` is one of `Exprs`. +// There are fewer than 64 RelExpr's, so we can represent any set of +// RelExpr's as a constant bit mask and test for membership with a +// couple cheap bitwise operations. +template <RelExpr... Exprs> bool isRelExprOneOf(RelExpr Expr) { + assert(0 <= Expr && (int)Expr < 64 && "RelExpr is too large for 64-bit mask!"); + return (uint64_t(1) << Expr) & RelExprMaskBuilder<Exprs...>::build(); +} + +// Architecture-neutral representation of relocation. +struct Relocation { + RelExpr Expr; + uint32_t Type; + uint64_t Offset; + uint64_t Addend; + SymbolBody *Sym; +}; + +template <class ELFT> void scanRelocations(InputSectionBase<ELFT> &); + +template <class ELFT> void createThunks(InputSectionBase<ELFT> &); + +template <class ELFT> +static inline typename ELFT::uint getAddend(const typename ELFT::Rel &Rel) { + return 0; +} + +template <class ELFT> +static inline typename ELFT::uint getAddend(const typename ELFT::Rela &Rel) { + return Rel.r_addend; +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/ScriptParser.cpp b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp new file mode 100644 index 000000000000..c740685a15a1 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp @@ -0,0 +1,200 @@ +//===- ScriptParser.cpp ---------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the base parser class for linker script and dynamic +// list. +// +//===----------------------------------------------------------------------===// + +#include "ScriptParser.h" +#include "Error.h" +#include "llvm/ADT/Twine.h" + +using namespace llvm; +using namespace lld; +using namespace lld::elf; + +// Returns a whole line containing the current token. +StringRef ScriptParserBase::getLine() { + StringRef S = getCurrentMB().getBuffer(); + StringRef Tok = Tokens[Pos - 1]; + + size_t Pos = S.rfind('\n', Tok.data() - S.data()); + if (Pos != StringRef::npos) + S = S.substr(Pos + 1); + return S.substr(0, S.find_first_of("\r\n")); +} + +// Returns 1-based line number of the current token. +size_t ScriptParserBase::getLineNumber() { + StringRef S = getCurrentMB().getBuffer(); + StringRef Tok = Tokens[Pos - 1]; + return S.substr(0, Tok.data() - S.data()).count('\n') + 1; +} + +// Returns 0-based column number of the current token. +size_t ScriptParserBase::getColumnNumber() { + StringRef Tok = Tokens[Pos - 1]; + return Tok.data() - getLine().data(); +} + +std::string ScriptParserBase::getCurrentLocation() { + std::string Filename = getCurrentMB().getBufferIdentifier(); + if (!Pos) + return Filename; + return (Filename + ":" + Twine(getLineNumber())).str(); +} + +ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) { tokenize(MB); } + +// We don't want to record cascading errors. Keep only the first one. +void ScriptParserBase::setError(const Twine &Msg) { + if (Error) + return; + Error = true; + + if (!Pos) { + error(getCurrentLocation() + ": " + Msg); + return; + } + + std::string S = getCurrentLocation() + ": "; + error(S + Msg); + error(S + getLine()); + error(S + std::string(getColumnNumber(), ' ') + "^"); +} + +// Split S into linker script tokens. +void ScriptParserBase::tokenize(MemoryBufferRef MB) { + std::vector<StringRef> Vec; + MBs.push_back(MB); + StringRef S = MB.getBuffer(); + StringRef Begin = S; + + for (;;) { + S = skipSpace(S); + if (S.empty()) + break; + + // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted + // as glob patterns. Double-quoted tokens are literal patterns in that + // context. + if (S.startswith("\"")) { + size_t E = S.find("\"", 1); + if (E == StringRef::npos) { + StringRef Filename = MB.getBufferIdentifier(); + size_t Lineno = Begin.substr(0, S.data() - Begin.data()).count('\n'); + error(Filename + ":" + Twine(Lineno + 1) + ": unclosed quote"); + return; + } + + Vec.push_back(S.take_front(E + 1)); + S = S.substr(E + 1); + continue; + } + + // Unquoted token. This is more relaxed than tokens in C-like language, + // so that you can write "file-name.cpp" as one bare token, for example. + size_t Pos = S.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-:!<>^"); + + // A character that cannot start a word (which is usually a + // punctuation) forms a single character token. + if (Pos == 0) + Pos = 1; + Vec.push_back(S.substr(0, Pos)); + S = S.substr(Pos); + } + + Tokens.insert(Tokens.begin() + Pos, Vec.begin(), Vec.end()); +} + +// Skip leading whitespace characters or comments. +StringRef ScriptParserBase::skipSpace(StringRef S) { + for (;;) { + if (S.startswith("/*")) { + size_t E = S.find("*/", 2); + if (E == StringRef::npos) { + error("unclosed comment in a linker script"); + return ""; + } + S = S.substr(E + 2); + continue; + } + if (S.startswith("#")) { + size_t E = S.find('\n', 1); + if (E == StringRef::npos) + E = S.size() - 1; + S = S.substr(E + 1); + continue; + } + size_t Size = S.size(); + S = S.ltrim(); + if (S.size() == Size) + return S; + } +} + +// An erroneous token is handled as if it were the last token before EOF. +bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; } + +StringRef ScriptParserBase::next() { + if (Error) + return ""; + if (atEOF()) { + setError("unexpected EOF"); + return ""; + } + return Tokens[Pos++]; +} + +StringRef ScriptParserBase::peek() { + StringRef Tok = next(); + if (Error) + return ""; + --Pos; + return Tok; +} + +bool ScriptParserBase::consume(StringRef Tok) { + if (peek() == Tok) { + skip(); + return true; + } + return false; +} + +void ScriptParserBase::skip() { (void)next(); } + +void ScriptParserBase::expect(StringRef Expect) { + if (Error) + return; + StringRef Tok = next(); + if (Tok != Expect) + setError(Expect + " expected, but got " + Tok); +} + +// Returns true if S encloses T. +static bool encloses(StringRef S, StringRef T) { + return S.bytes_begin() <= T.bytes_begin() && T.bytes_end() <= S.bytes_end(); +} + +MemoryBufferRef ScriptParserBase::getCurrentMB() { + // Find input buffer containing the current token. + assert(!MBs.empty()); + if (!Pos) + return MBs[0]; + + for (MemoryBufferRef MB : MBs) + if (encloses(MB.getBuffer(), Tokens[Pos - 1])) + return MB; + llvm_unreachable("getCurrentMB: failed to find a token"); +} diff --git a/contrib/llvm/tools/lld/ELF/ScriptParser.h b/contrib/llvm/tools/lld/ELF/ScriptParser.h new file mode 100644 index 000000000000..264c49792337 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/ScriptParser.h @@ -0,0 +1,53 @@ +//===- ScriptParser.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SCRIPT_PARSER_H +#define LLD_ELF_SCRIPT_PARSER_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include <utility> +#include <vector> + +namespace lld { +namespace elf { + +class ScriptParserBase { +public: + explicit ScriptParserBase(MemoryBufferRef MB); + + void setError(const Twine &Msg); + void tokenize(MemoryBufferRef MB); + static StringRef skipSpace(StringRef S); + bool atEOF(); + StringRef next(); + StringRef peek(); + void skip(); + bool consume(StringRef Tok); + void expect(StringRef Expect); + std::string getCurrentLocation(); + + std::vector<MemoryBufferRef> MBs; + std::vector<StringRef> Tokens; + size_t Pos = 0; + bool Error = false; + +private: + StringRef getLine(); + size_t getLineNumber(); + size_t getColumnNumber(); + + MemoryBufferRef getCurrentMB(); +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Strings.cpp b/contrib/llvm/tools/lld/ELF/Strings.cpp new file mode 100644 index 000000000000..ec3d1f1b2b51 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Strings.cpp @@ -0,0 +1,108 @@ +//===- Strings.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Strings.h" +#include "Config.h" +#include "Error.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Demangle/Demangle.h" +#include <algorithm> +#include <cstring> + +using namespace llvm; +using namespace lld; +using namespace lld::elf; + +StringMatcher::StringMatcher(ArrayRef<StringRef> Pat) { + for (StringRef S : Pat) { + Expected<GlobPattern> Pat = GlobPattern::create(S); + if (!Pat) + error(toString(Pat.takeError())); + else + Patterns.push_back(*Pat); + } +} + +bool StringMatcher::match(StringRef S) const { + for (const GlobPattern &Pat : Patterns) + if (Pat.match(S)) + return true; + return false; +} + +// If an input string is in the form of "foo.N" where N is a number, +// return N. Otherwise, returns 65536, which is one greater than the +// lowest priority. +int elf::getPriority(StringRef S) { + size_t Pos = S.rfind('.'); + if (Pos == StringRef::npos) + return 65536; + int V; + if (S.substr(Pos + 1).getAsInteger(10, V)) + return 65536; + return V; +} + +bool elf::hasWildcard(StringRef S) { + return S.find_first_of("?*[") != StringRef::npos; +} + +StringRef elf::unquote(StringRef S) { + if (!S.startswith("\"")) + return S; + return S.substr(1, S.size() - 2); +} + +// Converts a hex string (e.g. "deadbeef") to a vector. +std::vector<uint8_t> elf::parseHex(StringRef S) { + std::vector<uint8_t> Hex; + while (!S.empty()) { + StringRef B = S.substr(0, 2); + S = S.substr(2); + uint8_t H; + if (B.getAsInteger(16, H)) { + error("not a hexadecimal value: " + B); + return {}; + } + Hex.push_back(H); + } + return Hex; +} + +static bool isAlpha(char C) { + return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_'; +} + +static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); } + +// Returns true if S is valid as a C language identifier. +bool elf::isValidCIdentifier(StringRef S) { + return !S.empty() && isAlpha(S[0]) && + std::all_of(S.begin() + 1, S.end(), isAlnum); +} + +// Returns the demangled C++ symbol name for Name. +Optional<std::string> elf::demangle(StringRef Name) { + // __cxa_demangle can be used to demangle strings other than symbol + // names which do not necessarily start with "_Z". Name can be + // either a C or C++ symbol. Don't call __cxa_demangle if the name + // does not look like a C++ symbol name to avoid getting unexpected + // result for a C symbol that happens to match a mangled type name. + if (!Name.startswith("_Z")) + return None; + + char *Buf = itaniumDemangle(Name.str().c_str(), nullptr, nullptr, nullptr); + if (!Buf) + return None; + std::string S(Buf); + free(Buf); + return S; +} diff --git a/contrib/llvm/tools/lld/ELF/Strings.h b/contrib/llvm/tools/lld/ELF/Strings.h new file mode 100644 index 000000000000..934b6427105f --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Strings.h @@ -0,0 +1,82 @@ +//===- Strings.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_STRINGS_H +#define LLD_ELF_STRINGS_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/GlobPattern.h" +#include <vector> + +namespace lld { +namespace elf { + +int getPriority(StringRef S); +bool hasWildcard(StringRef S); +std::vector<uint8_t> parseHex(StringRef S); +bool isValidCIdentifier(StringRef S); +StringRef unquote(StringRef S); + +// This is a lazy version of StringRef. String size is computed lazily +// when it is needed. It is more efficient than StringRef to instantiate +// if you have a string whose size is unknown. +// +// ELF string tables contain a lot of null-terminated strings. +// Most of them are not necessary for the linker because they are names +// of local symbols and the linker doesn't use local symbol names for +// name resolution. So, we use this class to represents strings read +// from string tables. +class StringRefZ { +public: + StringRefZ() : Start(nullptr), Size(0) {} + StringRefZ(const char *S, size_t Size) : Start(S), Size(Size) {} + + /*implicit*/ StringRefZ(const char *S) : Start(S), Size(-1) {} + + /*implicit*/ StringRefZ(llvm::StringRef S) + : Start(S.data()), Size(S.size()) {} + + operator llvm::StringRef() const { + if (Size == (size_t)-1) + Size = strlen(Start); + return {Start, Size}; + } + +private: + const char *Start; + mutable size_t Size; +}; + +// This class represents multiple glob patterns. +class StringMatcher { +public: + StringMatcher() = default; + explicit StringMatcher(ArrayRef<StringRef> Pat); + + bool match(StringRef S) const; + +private: + std::vector<llvm::GlobPattern> Patterns; +}; + +// Returns a demangled C++ symbol name. If Name is not a mangled +// name, it returns Optional::None. +llvm::Optional<std::string> demangle(StringRef Name); + +inline StringRef toStringRef(ArrayRef<uint8_t> Arr) { + return {(const char *)Arr.data(), Arr.size()}; +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp new file mode 100644 index 000000000000..f08fa6229c1a --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp @@ -0,0 +1,710 @@ +//===- SymbolTable.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Symbol table is a bag of all known symbols. We put all symbols of +// all input files to the symbol table. The symbol table is basically +// a hash table with the logic to resolve symbol name conflicts using +// the symbol types. +// +//===----------------------------------------------------------------------===// + +#include "SymbolTable.h" +#include "Config.h" +#include "Error.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "Symbols.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +// All input object files must be for the same architecture +// (e.g. it does not make sense to link x86 object files with +// MIPS object files.) This function checks for that error. +template <class ELFT> static bool isCompatible(InputFile *F) { + if (!isa<ELFFileBase<ELFT>>(F) && !isa<BitcodeFile>(F)) + return true; + + if (F->EKind == Config->EKind && F->EMachine == Config->EMachine) { + if (Config->EMachine != EM_MIPS) + return true; + if (isMipsN32Abi(F) == Config->MipsN32Abi) + return true; + } + + if (!Config->Emulation.empty()) + error(toString(F) + " is incompatible with " + Config->Emulation); + else + error(toString(F) + " is incompatible with " + toString(Config->FirstElf)); + return false; +} + +// Add symbols in File to the symbol table. +template <class ELFT> void SymbolTable<ELFT>::addFile(InputFile *File) { + if (!isCompatible<ELFT>(File)) + return; + + // Binary file + if (auto *F = dyn_cast<BinaryFile>(File)) { + BinaryFiles.push_back(F); + F->parse<ELFT>(); + return; + } + + // .a file + if (auto *F = dyn_cast<ArchiveFile>(File)) { + F->parse<ELFT>(); + return; + } + + // Lazy object file + if (auto *F = dyn_cast<LazyObjectFile>(File)) { + F->parse<ELFT>(); + return; + } + + if (Config->Trace) + outs() << toString(File) << "\n"; + + // .so file + if (auto *F = dyn_cast<SharedFile<ELFT>>(File)) { + // DSOs are uniquified not by filename but by soname. + F->parseSoName(); + if (ErrorCount || !SoNames.insert(F->getSoName()).second) + return; + SharedFiles.push_back(F); + F->parseRest(); + return; + } + + // LLVM bitcode file + if (auto *F = dyn_cast<BitcodeFile>(File)) { + BitcodeFiles.push_back(F); + F->parse<ELFT>(ComdatGroups); + return; + } + + // Regular object file + auto *F = cast<ObjectFile<ELFT>>(File); + ObjectFiles.push_back(F); + F->parse(ComdatGroups); +} + +// This function is where all the optimizations of link-time +// optimization happens. When LTO is in use, some input files are +// not in native object file format but in the LLVM bitcode format. +// This function compiles bitcode files into a few big native files +// using LLVM functions and replaces bitcode symbols with the results. +// Because all bitcode files that consist of a program are passed +// to the compiler at once, it can do whole-program optimization. +template <class ELFT> void SymbolTable<ELFT>::addCombinedLTOObject() { + if (BitcodeFiles.empty()) + return; + + // Compile bitcode files and replace bitcode symbols. + LTO.reset(new BitcodeCompiler); + for (BitcodeFile *F : BitcodeFiles) + LTO->add<ELFT>(*F); + + for (InputFile *File : LTO->compile()) { + ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(File); + DenseSet<CachedHashStringRef> DummyGroups; + Obj->parse(DummyGroups); + ObjectFiles.push_back(Obj); + } +} + +template <class ELFT> +DefinedRegular<ELFT> *SymbolTable<ELFT>::addAbsolute(StringRef Name, + uint8_t Visibility, + uint8_t Binding) { + Symbol *Sym = + addRegular(Name, Visibility, STT_NOTYPE, 0, 0, Binding, nullptr, nullptr); + return cast<DefinedRegular<ELFT>>(Sym->body()); +} + +// Add Name as an "ignored" symbol. An ignored symbol is a regular +// linker-synthesized defined symbol, but is only defined if needed. +template <class ELFT> +DefinedRegular<ELFT> *SymbolTable<ELFT>::addIgnored(StringRef Name, + uint8_t Visibility) { + SymbolBody *S = find(Name); + if (!S || !S->isUndefined()) + return nullptr; + return addAbsolute(Name, Visibility); +} + +// Set a flag for --trace-symbol so that we can print out a log message +// if a new symbol with the same name is inserted into the symbol table. +template <class ELFT> void SymbolTable<ELFT>::trace(StringRef Name) { + Symtab.insert({CachedHashStringRef(Name), {-1, true}}); +} + +// Rename SYM as __wrap_SYM. The original symbol is preserved as __real_SYM. +// Used to implement --wrap. +template <class ELFT> void SymbolTable<ELFT>::wrap(StringRef Name) { + SymbolBody *B = find(Name); + if (!B) + return; + Symbol *Sym = B->symbol(); + Symbol *Real = addUndefined(Saver.save("__real_" + Name)); + Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name)); + + // We rename symbols by replacing the old symbol's SymbolBody with the new + // symbol's SymbolBody. This causes all SymbolBody pointers referring to the + // old symbol to instead refer to the new symbol. + memcpy(Real->Body.buffer, Sym->Body.buffer, sizeof(Sym->Body)); + memcpy(Sym->Body.buffer, Wrap->Body.buffer, sizeof(Wrap->Body)); +} + +static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) { + if (VA == STV_DEFAULT) + return VB; + if (VB == STV_DEFAULT) + return VA; + return std::min(VA, VB); +} + +// Find an existing symbol or create and insert a new one. +template <class ELFT> +std::pair<Symbol *, bool> SymbolTable<ELFT>::insert(StringRef Name) { + auto P = Symtab.insert( + {CachedHashStringRef(Name), SymIndex((int)SymVector.size(), false)}); + SymIndex &V = P.first->second; + bool IsNew = P.second; + + if (V.Idx == -1) { + IsNew = true; + V = SymIndex((int)SymVector.size(), true); + } + + Symbol *Sym; + if (IsNew) { + Sym = new (BAlloc) Symbol; + Sym->InVersionScript = false; + Sym->Binding = STB_WEAK; + Sym->Visibility = STV_DEFAULT; + Sym->IsUsedInRegularObj = false; + Sym->ExportDynamic = false; + Sym->Traced = V.Traced; + Sym->VersionId = Config->DefaultSymbolVersion; + SymVector.push_back(Sym); + } else { + Sym = SymVector[V.Idx]; + } + return {Sym, IsNew}; +} + +// Construct a string in the form of "Sym in File1 and File2". +// Used to construct an error message. +static std::string conflictMsg(SymbolBody *Existing, InputFile *NewFile) { + return "'" + toString(*Existing) + "' in " + toString(Existing->File) + + " and " + toString(NewFile); +} + +// Find an existing symbol or create and insert a new one, then apply the given +// attributes. +template <class ELFT> +std::pair<Symbol *, bool> +SymbolTable<ELFT>::insert(StringRef Name, uint8_t Type, uint8_t Visibility, + bool CanOmitFromDynSym, InputFile *File) { + bool IsUsedInRegularObj = !File || File->kind() == InputFile::ObjectKind; + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + + // Merge in the new symbol's visibility. + S->Visibility = getMinVisibility(S->Visibility, Visibility); + if (!CanOmitFromDynSym && (Config->Shared || Config->ExportDynamic)) + S->ExportDynamic = true; + if (IsUsedInRegularObj) + S->IsUsedInRegularObj = true; + if (!WasInserted && S->body()->Type != SymbolBody::UnknownType && + ((Type == STT_TLS) != S->body()->isTls())) + error("TLS attribute mismatch for symbol " + conflictMsg(S->body(), File)); + + return {S, WasInserted}; +} + +template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) { + return addUndefined(Name, /*IsLocal=*/false, STB_GLOBAL, STV_DEFAULT, + /*Type*/ 0, + /*CanOmitFromDynSym*/ false, /*File*/ nullptr); +} + +static uint8_t getVisibility(uint8_t StOther) { return StOther & 3; } + +template <class ELFT> +Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, bool IsLocal, + uint8_t Binding, uint8_t StOther, + uint8_t Type, bool CanOmitFromDynSym, + InputFile *File) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(Name, Type, getVisibility(StOther), CanOmitFromDynSym, File); + if (WasInserted) { + S->Binding = Binding; + replaceBody<Undefined<ELFT>>(S, Name, IsLocal, StOther, Type, File); + return S; + } + if (Binding != STB_WEAK) { + if (S->body()->isShared() || S->body()->isLazy()) + S->Binding = Binding; + if (auto *SS = dyn_cast<SharedSymbol<ELFT>>(S->body())) + SS->file()->IsUsed = true; + } + if (auto *L = dyn_cast<Lazy>(S->body())) { + // An undefined weak will not fetch archive members, but we have to remember + // its type. See also comment in addLazyArchive. + if (S->isWeak()) + L->Type = Type; + else if (InputFile *F = L->fetch()) + addFile(F); + } + return S; +} + +// We have a new defined symbol with the specified binding. Return 1 if the new +// symbol should win, -1 if the new symbol should lose, or 0 if both symbols are +// strong defined symbols. +static int compareDefined(Symbol *S, bool WasInserted, uint8_t Binding) { + if (WasInserted) + return 1; + SymbolBody *Body = S->body(); + if (Body->isLazy() || Body->isUndefined() || Body->isShared()) + return 1; + if (Binding == STB_WEAK) + return -1; + if (S->isWeak()) + return 1; + return 0; +} + +// We have a new non-common defined symbol with the specified binding. Return 1 +// if the new symbol should win, -1 if the new symbol should lose, or 0 if there +// is a conflict. If the new symbol wins, also update the binding. +template <typename ELFT> +static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding, + bool IsAbsolute, typename ELFT::uint Value) { + if (int Cmp = compareDefined(S, WasInserted, Binding)) { + if (Cmp > 0) + S->Binding = Binding; + return Cmp; + } + SymbolBody *B = S->body(); + if (isa<DefinedCommon>(B)) { + // Non-common symbols take precedence over common symbols. + if (Config->WarnCommon) + warn("common " + S->body()->getName() + " is overridden"); + return 1; + } else if (auto *R = dyn_cast<DefinedRegular<ELFT>>(B)) { + if (R->Section == nullptr && Binding == STB_GLOBAL && IsAbsolute && + R->Value == Value) + return -1; + } + return 0; +} + +template <class ELFT> +Symbol *SymbolTable<ELFT>::addCommon(StringRef N, uint64_t Size, + uint64_t Alignment, uint8_t Binding, + uint8_t StOther, uint8_t Type, + InputFile *File) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N, Type, getVisibility(StOther), + /*CanOmitFromDynSym*/ false, File); + int Cmp = compareDefined(S, WasInserted, Binding); + if (Cmp > 0) { + S->Binding = Binding; + replaceBody<DefinedCommon>(S, N, Size, Alignment, StOther, Type, File); + } else if (Cmp == 0) { + auto *C = dyn_cast<DefinedCommon>(S->body()); + if (!C) { + // Non-common symbols take precedence over common symbols. + if (Config->WarnCommon) + warn("common " + S->body()->getName() + " is overridden"); + return S; + } + + if (Config->WarnCommon) + warn("multiple common of " + S->body()->getName()); + + Alignment = C->Alignment = std::max(C->Alignment, Alignment); + if (Size > C->Size) + replaceBody<DefinedCommon>(S, N, Size, Alignment, StOther, Type, File); + } + return S; +} + +static void print(const Twine &Msg) { + if (Config->AllowMultipleDefinition) + warn(Msg); + else + error(Msg); +} + +static void reportDuplicate(SymbolBody *Existing, InputFile *NewFile) { + print("duplicate symbol " + conflictMsg(Existing, NewFile)); +} + +template <class ELFT> +static void reportDuplicate(SymbolBody *Existing, + InputSectionBase<ELFT> *ErrSec, + typename ELFT::uint ErrOffset) { + DefinedRegular<ELFT> *D = dyn_cast<DefinedRegular<ELFT>>(Existing); + if (!D || !D->Section || !ErrSec) { + reportDuplicate(Existing, ErrSec ? ErrSec->getFile() : nullptr); + return; + } + + std::string OldLoc = D->Section->getLocation(D->Value); + std::string NewLoc = ErrSec->getLocation(ErrOffset); + + print(NewLoc + ": duplicate symbol '" + toString(*Existing) + "'"); + print(OldLoc + ": previous definition was here"); +} + +template <typename ELFT> +Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, uint8_t StOther, + uint8_t Type, uintX_t Value, uintX_t Size, + uint8_t Binding, + InputSectionBase<ELFT> *Section, + InputFile *File) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name, Type, getVisibility(StOther), + /*CanOmitFromDynSym*/ false, File); + int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, Binding, + Section == nullptr, Value); + if (Cmp > 0) + replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type, + Value, Size, Section, File); + else if (Cmp == 0) + reportDuplicate(S->body(), Section, Value); + return S; +} + +template <typename ELFT> +Symbol *SymbolTable<ELFT>::addSynthetic(StringRef N, + const OutputSectionBase *Section, + uintX_t Value, uint8_t StOther) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N, STT_NOTYPE, getVisibility(StOther), + /*CanOmitFromDynSym*/ false, nullptr); + int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, STB_GLOBAL, + /*IsAbsolute*/ false, /*Value*/ 0); + if (Cmp > 0) + replaceBody<DefinedSynthetic>(S, N, Value, Section); + else if (Cmp == 0) + reportDuplicate(S->body(), nullptr); + return S; +} + +template <typename ELFT> +void SymbolTable<ELFT>::addShared(SharedFile<ELFT> *F, StringRef Name, + const Elf_Sym &Sym, + const typename ELFT::Verdef *Verdef) { + // DSO symbols do not affect visibility in the output, so we pass STV_DEFAULT + // as the visibility, which will leave the visibility in the symbol table + // unchanged. + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(Name, Sym.getType(), STV_DEFAULT, /*CanOmitFromDynSym*/ true, F); + // Make sure we preempt DSO symbols with default visibility. + if (Sym.getVisibility() == STV_DEFAULT) { + S->ExportDynamic = true; + // Exporting preempting symbols takes precedence over linker scripts. + if (S->VersionId == VER_NDX_LOCAL) + S->VersionId = VER_NDX_GLOBAL; + } + if (WasInserted || isa<Undefined<ELFT>>(S->body())) { + replaceBody<SharedSymbol<ELFT>>(S, F, Name, Sym, Verdef); + if (!S->isWeak()) + F->IsUsed = true; + } +} + +template <class ELFT> +Symbol *SymbolTable<ELFT>::addBitcode(StringRef Name, uint8_t Binding, + uint8_t StOther, uint8_t Type, + bool CanOmitFromDynSym, BitcodeFile *F) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(Name, Type, getVisibility(StOther), CanOmitFromDynSym, F); + int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, Binding, + /*IsAbs*/ false, /*Value*/ 0); + if (Cmp > 0) + replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type, + 0, 0, nullptr, F); + else if (Cmp == 0) + reportDuplicate(S->body(), F); + return S; +} + +template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) { + auto It = Symtab.find(CachedHashStringRef(Name)); + if (It == Symtab.end()) + return nullptr; + SymIndex V = It->second; + if (V.Idx == -1) + return nullptr; + return SymVector[V.Idx]->body(); +} + +template <class ELFT> +void SymbolTable<ELFT>::addLazyArchive(ArchiveFile *F, + const object::Archive::Symbol Sym) { + Symbol *S; + bool WasInserted; + StringRef Name = Sym.getName(); + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + replaceBody<LazyArchive>(S, *F, Sym, SymbolBody::UnknownType); + return; + } + if (!S->body()->isUndefined()) + return; + + // Weak undefined symbols should not fetch members from archives. If we were + // to keep old symbol we would not know that an archive member was available + // if a strong undefined symbol shows up afterwards in the link. If a strong + // undefined symbol never shows up, this lazy symbol will get to the end of + // the link and must be treated as the weak undefined one. We already marked + // this symbol as used when we added it to the symbol table, but we also need + // to preserve its type. FIXME: Move the Type field to Symbol. + if (S->isWeak()) { + replaceBody<LazyArchive>(S, *F, Sym, S->body()->Type); + return; + } + std::pair<MemoryBufferRef, uint64_t> MBInfo = F->getMember(&Sym); + if (!MBInfo.first.getBuffer().empty()) + addFile(createObjectFile(MBInfo.first, F->getName(), MBInfo.second)); +} + +template <class ELFT> +void SymbolTable<ELFT>::addLazyObject(StringRef Name, LazyObjectFile &Obj) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + replaceBody<LazyObject>(S, Name, Obj, SymbolBody::UnknownType); + return; + } + if (!S->body()->isUndefined()) + return; + + // See comment for addLazyArchive above. + if (S->isWeak()) { + replaceBody<LazyObject>(S, Name, Obj, S->body()->Type); + } else { + MemoryBufferRef MBRef = Obj.getBuffer(); + if (!MBRef.getBuffer().empty()) + addFile(createObjectFile(MBRef)); + } +} + +// Process undefined (-u) flags by loading lazy symbols named by those flags. +template <class ELFT> void SymbolTable<ELFT>::scanUndefinedFlags() { + for (StringRef S : Config->Undefined) + if (auto *L = dyn_cast_or_null<Lazy>(find(S))) + if (InputFile *File = L->fetch()) + addFile(File); +} + +// This function takes care of the case in which shared libraries depend on +// the user program (not the other way, which is usual). Shared libraries +// may have undefined symbols, expecting that the user program provides +// the definitions for them. An example is BSD's __progname symbol. +// We need to put such symbols to the main program's .dynsym so that +// shared libraries can find them. +// Except this, we ignore undefined symbols in DSOs. +template <class ELFT> void SymbolTable<ELFT>::scanShlibUndefined() { + for (SharedFile<ELFT> *File : SharedFiles) + for (StringRef U : File->getUndefinedSymbols()) + if (SymbolBody *Sym = find(U)) + if (Sym->isDefined()) + Sym->symbol()->ExportDynamic = true; +} + +// Initialize DemangledSyms with a map from demangled symbols to symbol +// objects. Used to handle "extern C++" directive in version scripts. +// +// The map will contain all demangled symbols. That can be very large, +// and in LLD we generally want to avoid do anything for each symbol. +// Then, why are we doing this? Here's why. +// +// Users can use "extern C++ {}" directive to match against demangled +// C++ symbols. For example, you can write a pattern such as +// "llvm::*::foo(int, ?)". Obviously, there's no way to handle this +// other than trying to match a pattern against all demangled symbols. +// So, if "extern C++" feature is used, we need to demangle all known +// symbols. +template <class ELFT> +StringMap<std::vector<SymbolBody *>> &SymbolTable<ELFT>::getDemangledSyms() { + if (!DemangledSyms) { + DemangledSyms.emplace(); + for (Symbol *Sym : SymVector) { + SymbolBody *B = Sym->body(); + if (B->isUndefined()) + continue; + if (Optional<std::string> S = demangle(B->getName())) + (*DemangledSyms)[*S].push_back(B); + else + (*DemangledSyms)[B->getName()].push_back(B); + } + } + return *DemangledSyms; +} + +template <class ELFT> +std::vector<SymbolBody *> SymbolTable<ELFT>::findByVersion(SymbolVersion Ver) { + if (Ver.IsExternCpp) + return getDemangledSyms().lookup(Ver.Name); + if (SymbolBody *B = find(Ver.Name)) + if (!B->isUndefined()) + return {B}; + return {}; +} + +template <class ELFT> +std::vector<SymbolBody *> +SymbolTable<ELFT>::findAllByVersion(SymbolVersion Ver) { + std::vector<SymbolBody *> Res; + StringMatcher M(Ver.Name); + + if (Ver.IsExternCpp) { + for (auto &P : getDemangledSyms()) + if (M.match(P.first())) + Res.insert(Res.end(), P.second.begin(), P.second.end()); + return Res; + } + + for (Symbol *Sym : SymVector) { + SymbolBody *B = Sym->body(); + if (!B->isUndefined() && M.match(B->getName())) + Res.push_back(B); + } + return Res; +} + +// If there's only one anonymous version definition in a version +// script file, the script does not actually define any symbol version, +// but just specifies symbols visibilities. We assume that the script was +// in the form of { global: foo; bar; local *; }. So, local is default. +// In this function, we make specified symbols global. +template <class ELFT> void SymbolTable<ELFT>::handleAnonymousVersion() { + for (SymbolVersion &Ver : Config->VersionScriptGlobals) { + if (Ver.HasWildcard) { + for (SymbolBody *B : findAllByVersion(Ver)) + B->symbol()->VersionId = VER_NDX_GLOBAL; + continue; + } + for (SymbolBody *B : findByVersion(Ver)) + B->symbol()->VersionId = VER_NDX_GLOBAL; + } +} + +// Set symbol versions to symbols. This function handles patterns +// containing no wildcard characters. +template <class ELFT> +void SymbolTable<ELFT>::assignExactVersion(SymbolVersion Ver, uint16_t VersionId, + StringRef VersionName) { + if (Ver.HasWildcard) + return; + + // Get a list of symbols which we need to assign the version to. + std::vector<SymbolBody *> Syms = findByVersion(Ver); + if (Syms.empty()) { + if (Config->NoUndefinedVersion) + error("version script assignment of '" + VersionName + "' to symbol '" + + Ver.Name + "' failed: symbol not defined"); + return; + } + + // Assign the version. + for (SymbolBody *B : Syms) { + Symbol *Sym = B->symbol(); + if (Sym->InVersionScript) + warn("duplicate symbol '" + Ver.Name + "' in version script"); + Sym->VersionId = VersionId; + Sym->InVersionScript = true; + } +} + +template <class ELFT> +void SymbolTable<ELFT>::assignWildcardVersion(SymbolVersion Ver, + uint16_t VersionId) { + if (!Ver.HasWildcard) + return; + std::vector<SymbolBody *> Syms = findAllByVersion(Ver); + + // Exact matching takes precendence over fuzzy matching, + // so we set a version to a symbol only if no version has been assigned + // to the symbol. This behavior is compatible with GNU. + for (SymbolBody *B : Syms) + if (B->symbol()->VersionId == Config->DefaultSymbolVersion) + B->symbol()->VersionId = VersionId; +} + +// This function processes version scripts by updating VersionId +// member of symbols. +template <class ELFT> void SymbolTable<ELFT>::scanVersionScript() { + // Symbol themselves might know their versions because symbols + // can contain versions in the form of <name>@<version>. + // Let them parse their names. + if (!Config->VersionDefinitions.empty()) + for (Symbol *Sym : SymVector) + Sym->body()->parseSymbolVersion(); + + // Handle edge cases first. + if (!Config->VersionScriptGlobals.empty()) { + handleAnonymousVersion(); + return; + } + + if (Config->VersionDefinitions.empty()) + return; + + // Now we have version definitions, so we need to set version ids to symbols. + // Each version definition has a glob pattern, and all symbols that match + // with the pattern get that version. + + // First, we assign versions to exact matching symbols, + // i.e. version definitions not containing any glob meta-characters. + for (SymbolVersion &Ver : Config->VersionScriptLocals) + assignExactVersion(Ver, VER_NDX_LOCAL, "local"); + for (VersionDefinition &V : Config->VersionDefinitions) + for (SymbolVersion &Ver : V.Globals) + assignExactVersion(Ver, V.Id, V.Name); + + // Next, we assign versions to fuzzy matching symbols, + // i.e. version definitions containing glob meta-characters. + // Note that because the last match takes precedence over previous matches, + // we iterate over the definitions in the reverse order. + for (SymbolVersion &Ver : Config->VersionScriptLocals) + assignWildcardVersion(Ver, VER_NDX_LOCAL); + for (VersionDefinition &V : llvm::reverse(Config->VersionDefinitions)) + for (SymbolVersion &Ver : V.Globals) + assignWildcardVersion(Ver, V.Id); +} + +template class elf::SymbolTable<ELF32LE>; +template class elf::SymbolTable<ELF32BE>; +template class elf::SymbolTable<ELF64LE>; +template class elf::SymbolTable<ELF64BE>; diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.h b/contrib/llvm/tools/lld/ELF/SymbolTable.h new file mode 100644 index 000000000000..1e5a335acc16 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/SymbolTable.h @@ -0,0 +1,151 @@ +//===- SymbolTable.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYMBOL_TABLE_H +#define LLD_ELF_SYMBOL_TABLE_H + +#include "InputFiles.h" +#include "LTO.h" +#include "Strings.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseMap.h" + +namespace lld { +namespace elf { +class Lazy; +class OutputSectionBase; +struct Symbol; + +// SymbolTable is a bucket of all known symbols, including defined, +// undefined, or lazy symbols (the last one is symbols in archive +// files whose archive members are not yet loaded). +// +// We put all symbols of all files to a SymbolTable, and the +// SymbolTable selects the "best" symbols if there are name +// conflicts. For example, obviously, a defined symbol is better than +// an undefined symbol. Or, if there's a conflict between a lazy and a +// undefined, it'll read an archive member to read a real definition +// to replace the lazy symbol. The logic is implemented in the +// add*() functions, which are called by input files as they are parsed. There +// is one add* function per symbol type. +template <class ELFT> class SymbolTable { + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + +public: + void addFile(InputFile *File); + void addCombinedLTOObject(); + + ArrayRef<Symbol *> getSymbols() const { return SymVector; } + ArrayRef<ObjectFile<ELFT> *> getObjectFiles() const { return ObjectFiles; } + ArrayRef<BinaryFile *> getBinaryFiles() const { return BinaryFiles; } + ArrayRef<SharedFile<ELFT> *> getSharedFiles() const { return SharedFiles; } + + DefinedRegular<ELFT> *addAbsolute(StringRef Name, + uint8_t Visibility = llvm::ELF::STV_HIDDEN, + uint8_t Binding = llvm::ELF::STB_GLOBAL); + DefinedRegular<ELFT> *addIgnored(StringRef Name, + uint8_t Visibility = llvm::ELF::STV_HIDDEN); + + Symbol *addUndefined(StringRef Name); + Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding, + uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, + InputFile *File); + + Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type, + uintX_t Value, uintX_t Size, uint8_t Binding, + InputSectionBase<ELFT> *Section, InputFile *File); + + Symbol *addSynthetic(StringRef N, const OutputSectionBase *Section, + uintX_t Value, uint8_t StOther); + + void addShared(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym, + const typename ELFT::Verdef *Verdef); + + void addLazyArchive(ArchiveFile *F, const llvm::object::Archive::Symbol S); + void addLazyObject(StringRef Name, LazyObjectFile &Obj); + Symbol *addBitcode(StringRef Name, uint8_t Binding, uint8_t StOther, + uint8_t Type, bool CanOmitFromDynSym, BitcodeFile *File); + + Symbol *addCommon(StringRef N, uint64_t Size, uint64_t Alignment, + uint8_t Binding, uint8_t StOther, uint8_t Type, + InputFile *File); + + void scanUndefinedFlags(); + void scanShlibUndefined(); + void scanVersionScript(); + + SymbolBody *find(StringRef Name); + + void trace(StringRef Name); + void wrap(StringRef Name); + + std::vector<InputSectionBase<ELFT> *> Sections; + +private: + std::pair<Symbol *, bool> insert(StringRef Name); + std::pair<Symbol *, bool> insert(StringRef Name, uint8_t Type, + uint8_t Visibility, bool CanOmitFromDynSym, + InputFile *File); + + std::vector<SymbolBody *> findByVersion(SymbolVersion Ver); + std::vector<SymbolBody *> findAllByVersion(SymbolVersion Ver); + + llvm::StringMap<std::vector<SymbolBody *>> &getDemangledSyms(); + void handleAnonymousVersion(); + void assignExactVersion(SymbolVersion Ver, uint16_t VersionId, + StringRef VersionName); + void assignWildcardVersion(SymbolVersion Ver, uint16_t VersionId); + + struct SymIndex { + SymIndex(int Idx, bool Traced) : Idx(Idx), Traced(Traced) {} + int Idx : 31; + unsigned Traced : 1; + }; + + // The order the global symbols are in is not defined. We can use an arbitrary + // order, but it has to be reproducible. That is true even when cross linking. + // The default hashing of StringRef produces different results on 32 and 64 + // bit systems so we use a map to a vector. That is arbitrary, deterministic + // but a bit inefficient. + // FIXME: Experiment with passing in a custom hashing or sorting the symbols + // once symbol resolution is finished. + llvm::DenseMap<llvm::CachedHashStringRef, SymIndex> Symtab; + std::vector<Symbol *> SymVector; + + // Comdat groups define "link once" sections. If two comdat groups have the + // same name, only one of them is linked, and the other is ignored. This set + // is used to uniquify them. + llvm::DenseSet<llvm::CachedHashStringRef> ComdatGroups; + + std::vector<ObjectFile<ELFT> *> ObjectFiles; + std::vector<SharedFile<ELFT> *> SharedFiles; + std::vector<BitcodeFile *> BitcodeFiles; + std::vector<BinaryFile *> BinaryFiles; + + // Set of .so files to not link the same shared object file more than once. + llvm::DenseSet<StringRef> SoNames; + + // A map from demangled symbol names to their symbol objects. + // This mapping is 1:N because two symbols with different versions + // can have the same name. We use this map to handle "extern C++ {}" + // directive in version scripts. + llvm::Optional<llvm::StringMap<std::vector<SymbolBody *>>> DemangledSyms; + + // For LTO. + std::unique_ptr<BitcodeCompiler> LTO; +}; + +template <class ELFT> struct Symtab { static SymbolTable<ELFT> *X; }; +template <class ELFT> SymbolTable<ELFT> *Symtab<ELFT>::X; + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Symbols.cpp b/contrib/llvm/tools/lld/ELF/Symbols.cpp new file mode 100644 index 000000000000..f3edafaf4b78 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Symbols.cpp @@ -0,0 +1,372 @@ +//===- Symbols.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Symbols.h" +#include "Error.h" +#include "InputFiles.h" +#include "InputSection.h" +#include "OutputSections.h" +#include "Strings.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Writer.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Path.h" +#include <cstring> + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +template <class ELFT> +static typename ELFT::uint getSymVA(const SymbolBody &Body, + typename ELFT::uint &Addend) { + typedef typename ELFT::uint uintX_t; + + switch (Body.kind()) { + case SymbolBody::DefinedSyntheticKind: { + auto &D = cast<DefinedSynthetic>(Body); + const OutputSectionBase *Sec = D.Section; + if (!Sec) + return D.Value; + if (D.Value == uintX_t(-1)) + return Sec->Addr + Sec->Size; + return Sec->Addr + D.Value; + } + case SymbolBody::DefinedRegularKind: { + auto &D = cast<DefinedRegular<ELFT>>(Body); + InputSectionBase<ELFT> *IS = D.Section; + + // According to the ELF spec reference to a local symbol from outside + // the group are not allowed. Unfortunately .eh_frame breaks that rule + // and must be treated specially. For now we just replace the symbol with + // 0. + if (IS == &InputSection<ELFT>::Discarded) + return 0; + + // This is an absolute symbol. + if (!IS) + return D.Value; + + uintX_t Offset = D.Value; + if (D.isSection()) { + Offset += Addend; + Addend = 0; + } + uintX_t VA = (IS->OutSec ? IS->OutSec->Addr : 0) + IS->getOffset(Offset); + if (D.isTls() && !Config->Relocatable) { + if (!Out<ELFT>::TlsPhdr) + fatal(toString(D.File) + + " has a STT_TLS symbol but doesn't have a PT_TLS section"); + return VA - Out<ELFT>::TlsPhdr->p_vaddr; + } + return VA; + } + case SymbolBody::DefinedCommonKind: + return In<ELFT>::Common->OutSec->Addr + In<ELFT>::Common->OutSecOff + + cast<DefinedCommon>(Body).Offset; + case SymbolBody::SharedKind: { + auto &SS = cast<SharedSymbol<ELFT>>(Body); + if (!SS.NeedsCopyOrPltAddr) + return 0; + if (SS.isFunc()) + return Body.getPltVA<ELFT>(); + return Out<ELFT>::Bss->Addr + SS.OffsetInBss; + } + case SymbolBody::UndefinedKind: + return 0; + case SymbolBody::LazyArchiveKind: + case SymbolBody::LazyObjectKind: + assert(Body.symbol()->IsUsedInRegularObj && "lazy symbol reached writer"); + return 0; + } + llvm_unreachable("invalid symbol kind"); +} + +SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type) + : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(IsLocal), + IsInGlobalMipsGot(false), Is32BitMipsGot(false), IsInIplt(false), + IsInIgot(false), Type(Type), StOther(StOther), Name(Name) {} + +// Returns true if a symbol can be replaced at load-time by a symbol +// with the same name defined in other ELF executable or DSO. +bool SymbolBody::isPreemptible() const { + if (isLocal()) + return false; + + // Shared symbols resolve to the definition in the DSO. The exceptions are + // symbols with copy relocations (which resolve to .bss) or preempt plt + // entries (which resolve to that plt entry). + if (isShared()) + return !NeedsCopyOrPltAddr; + + // That's all that can be preempted in a non-DSO. + if (!Config->Shared) + return false; + + // Only symbols that appear in dynsym can be preempted. + if (!symbol()->includeInDynsym()) + return false; + + // Only default visibility symbols can be preempted. + if (symbol()->Visibility != STV_DEFAULT) + return false; + + // -Bsymbolic means that definitions are not preempted. + if (Config->Bsymbolic || (Config->BsymbolicFunctions && isFunc())) + return !isDefined(); + return true; +} + +template <class ELFT> bool SymbolBody::hasThunk() const { + if (auto *DR = dyn_cast<DefinedRegular<ELFT>>(this)) + return DR->ThunkData != nullptr; + if (auto *S = dyn_cast<SharedSymbol<ELFT>>(this)) + return S->ThunkData != nullptr; + return false; +} + +template <class ELFT> +typename ELFT::uint SymbolBody::getVA(typename ELFT::uint Addend) const { + typename ELFT::uint OutVA = getSymVA<ELFT>(*this, Addend); + return OutVA + Addend; +} + +template <class ELFT> typename ELFT::uint SymbolBody::getGotVA() const { + return In<ELFT>::Got->getVA() + getGotOffset<ELFT>(); +} + +template <class ELFT> typename ELFT::uint SymbolBody::getGotOffset() const { + return GotIndex * Target->GotEntrySize; +} + +template <class ELFT> typename ELFT::uint SymbolBody::getGotPltVA() const { + if (this->IsInIgot) + return In<ELFT>::IgotPlt->getVA() + getGotPltOffset<ELFT>(); + return In<ELFT>::GotPlt->getVA() + getGotPltOffset<ELFT>(); +} + +template <class ELFT> typename ELFT::uint SymbolBody::getGotPltOffset() const { + return GotPltIndex * Target->GotPltEntrySize; +} + +template <class ELFT> typename ELFT::uint SymbolBody::getPltVA() const { + if (this->IsInIplt) + return In<ELFT>::Iplt->getVA() + PltIndex * Target->PltEntrySize; + return In<ELFT>::Plt->getVA() + Target->PltHeaderSize + + PltIndex * Target->PltEntrySize; +} + +template <class ELFT> typename ELFT::uint SymbolBody::getThunkVA() const { + if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(this)) + return DR->ThunkData->getVA(); + if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this)) + return S->ThunkData->getVA(); + if (const auto *S = dyn_cast<Undefined<ELFT>>(this)) + return S->ThunkData->getVA(); + fatal("getThunkVA() not supported for Symbol class\n"); +} + +template <class ELFT> typename ELFT::uint SymbolBody::getSize() const { + if (const auto *C = dyn_cast<DefinedCommon>(this)) + return C->Size; + if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(this)) + return DR->Size; + if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this)) + return S->Sym.st_size; + return 0; +} + +// If a symbol name contains '@', the characters after that is +// a symbol version name. This function parses that. +void SymbolBody::parseSymbolVersion() { + StringRef S = getName(); + size_t Pos = S.find('@'); + if (Pos == 0 || Pos == StringRef::npos) + return; + StringRef Verstr = S.substr(Pos + 1); + if (Verstr.empty()) + return; + + // Truncate the symbol name so that it doesn't include the version string. + Name = {S.data(), Pos}; + + // If this is an undefined or shared symbol it is not a definition. + if (isUndefined() || isShared()) + return; + + // '@@' in a symbol name means the default version. + // It is usually the most recent one. + bool IsDefault = (Verstr[0] == '@'); + if (IsDefault) + Verstr = Verstr.substr(1); + + for (VersionDefinition &Ver : Config->VersionDefinitions) { + if (Ver.Name != Verstr) + continue; + + if (IsDefault) + symbol()->VersionId = Ver.Id; + else + symbol()->VersionId = Ver.Id | VERSYM_HIDDEN; + return; + } + + // It is an error if the specified version is not defined. + error(toString(File) + ": symbol " + S + " has undefined version " + Verstr); +} + +Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type) + : SymbolBody(K, Name, IsLocal, StOther, Type) {} + +template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const { + if (!Section || !isFunc()) + return false; + return (this->StOther & STO_MIPS_MIPS16) == STO_MIPS_PIC || + (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC); +} + +template <typename ELFT> +Undefined<ELFT>::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type, InputFile *File) + : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) { + this->File = File; +} + +DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment, + uint8_t StOther, uint8_t Type, InputFile *File) + : Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther, + Type), + Alignment(Alignment), Size(Size) { + this->File = File; +} + +InputFile *Lazy::fetch() { + if (auto *S = dyn_cast<LazyArchive>(this)) + return S->fetch(); + return cast<LazyObject>(this)->fetch(); +} + +LazyArchive::LazyArchive(ArchiveFile &File, + const llvm::object::Archive::Symbol S, uint8_t Type) + : Lazy(LazyArchiveKind, S.getName(), Type), Sym(S) { + this->File = &File; +} + +LazyObject::LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type) + : Lazy(LazyObjectKind, Name, Type) { + this->File = &File; +} + +InputFile *LazyArchive::fetch() { + std::pair<MemoryBufferRef, uint64_t> MBInfo = file()->getMember(&Sym); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (MBInfo.first.getBuffer().empty()) + return nullptr; + return createObjectFile(MBInfo.first, file()->getName(), MBInfo.second); +} + +InputFile *LazyObject::fetch() { + MemoryBufferRef MBRef = file()->getBuffer(); + if (MBRef.getBuffer().empty()) + return nullptr; + return createObjectFile(MBRef); +} + +bool Symbol::includeInDynsym() const { + if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) + return false; + return (ExportDynamic && VersionId != VER_NDX_LOCAL) || body()->isShared() || + (body()->isUndefined() && Config->Shared); +} + +// Print out a log message for --trace-symbol. +void elf::printTraceSymbol(Symbol *Sym) { + SymbolBody *B = Sym->body(); + outs() << toString(B->File); + + if (B->isUndefined()) + outs() << ": reference to "; + else if (B->isCommon()) + outs() << ": common definition of "; + else + outs() << ": definition of "; + outs() << B->getName() << "\n"; +} + +// Returns a symbol for an error message. +std::string lld::toString(const SymbolBody &B) { + if (Config->Demangle) + if (Optional<std::string> S = demangle(B.getName())) + return *S; + return B.getName(); +} + +template bool SymbolBody::hasThunk<ELF32LE>() const; +template bool SymbolBody::hasThunk<ELF32BE>() const; +template bool SymbolBody::hasThunk<ELF64LE>() const; +template bool SymbolBody::hasThunk<ELF64BE>() const; + +template uint32_t SymbolBody::template getVA<ELF32LE>(uint32_t) const; +template uint32_t SymbolBody::template getVA<ELF32BE>(uint32_t) const; +template uint64_t SymbolBody::template getVA<ELF64LE>(uint64_t) const; +template uint64_t SymbolBody::template getVA<ELF64BE>(uint64_t) const; + +template uint32_t SymbolBody::template getGotVA<ELF32LE>() const; +template uint32_t SymbolBody::template getGotVA<ELF32BE>() const; +template uint64_t SymbolBody::template getGotVA<ELF64LE>() const; +template uint64_t SymbolBody::template getGotVA<ELF64BE>() const; + +template uint32_t SymbolBody::template getGotOffset<ELF32LE>() const; +template uint32_t SymbolBody::template getGotOffset<ELF32BE>() const; +template uint64_t SymbolBody::template getGotOffset<ELF64LE>() const; +template uint64_t SymbolBody::template getGotOffset<ELF64BE>() const; + +template uint32_t SymbolBody::template getGotPltVA<ELF32LE>() const; +template uint32_t SymbolBody::template getGotPltVA<ELF32BE>() const; +template uint64_t SymbolBody::template getGotPltVA<ELF64LE>() const; +template uint64_t SymbolBody::template getGotPltVA<ELF64BE>() const; + +template uint32_t SymbolBody::template getThunkVA<ELF32LE>() const; +template uint32_t SymbolBody::template getThunkVA<ELF32BE>() const; +template uint64_t SymbolBody::template getThunkVA<ELF64LE>() const; +template uint64_t SymbolBody::template getThunkVA<ELF64BE>() const; + +template uint32_t SymbolBody::template getGotPltOffset<ELF32LE>() const; +template uint32_t SymbolBody::template getGotPltOffset<ELF32BE>() const; +template uint64_t SymbolBody::template getGotPltOffset<ELF64LE>() const; +template uint64_t SymbolBody::template getGotPltOffset<ELF64BE>() const; + +template uint32_t SymbolBody::template getPltVA<ELF32LE>() const; +template uint32_t SymbolBody::template getPltVA<ELF32BE>() const; +template uint64_t SymbolBody::template getPltVA<ELF64LE>() const; +template uint64_t SymbolBody::template getPltVA<ELF64BE>() const; + +template uint32_t SymbolBody::template getSize<ELF32LE>() const; +template uint32_t SymbolBody::template getSize<ELF32BE>() const; +template uint64_t SymbolBody::template getSize<ELF64LE>() const; +template uint64_t SymbolBody::template getSize<ELF64BE>() const; + +template class elf::Undefined<ELF32LE>; +template class elf::Undefined<ELF32BE>; +template class elf::Undefined<ELF64LE>; +template class elf::Undefined<ELF64BE>; + +template class elf::DefinedRegular<ELF32LE>; +template class elf::DefinedRegular<ELF32BE>; +template class elf::DefinedRegular<ELF64LE>; +template class elf::DefinedRegular<ELF64BE>; diff --git a/contrib/llvm/tools/lld/ELF/Symbols.h b/contrib/llvm/tools/lld/ELF/Symbols.h new file mode 100644 index 000000000000..38889571679c --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Symbols.h @@ -0,0 +1,461 @@ +//===- Symbols.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// All symbols are handled as SymbolBodies regardless of their types. +// This file defines various types of SymbolBodies. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYMBOLS_H +#define LLD_ELF_SYMBOLS_H + +#include "InputSection.h" +#include "Strings.h" + +#include "lld/Core/LLVM.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELF.h" + +namespace lld { +namespace elf { + +class ArchiveFile; +class BitcodeFile; +class InputFile; +class LazyObjectFile; +template <class ELFT> class ObjectFile; +template <class ELFT> class OutputSection; +class OutputSectionBase; +template <class ELFT> class SharedFile; + +struct Symbol; + +// The base class for real symbol classes. +class SymbolBody { +public: + enum Kind { + DefinedFirst, + DefinedRegularKind = DefinedFirst, + SharedKind, + DefinedCommonKind, + DefinedSyntheticKind, + DefinedLast = DefinedSyntheticKind, + UndefinedKind, + LazyArchiveKind, + LazyObjectKind, + }; + + SymbolBody(Kind K) : SymbolKind(K) {} + + Symbol *symbol(); + const Symbol *symbol() const { + return const_cast<SymbolBody *>(this)->symbol(); + } + + Kind kind() const { return static_cast<Kind>(SymbolKind); } + + bool isUndefined() const { return SymbolKind == UndefinedKind; } + bool isDefined() const { return SymbolKind <= DefinedLast; } + bool isCommon() const { return SymbolKind == DefinedCommonKind; } + bool isLazy() const { + return SymbolKind == LazyArchiveKind || SymbolKind == LazyObjectKind; + } + bool isShared() const { return SymbolKind == SharedKind; } + bool isLocal() const { return IsLocal; } + bool isPreemptible() const; + StringRef getName() const { return Name; } + uint8_t getVisibility() const { return StOther & 0x3; } + void parseSymbolVersion(); + + bool isInGot() const { return GotIndex != -1U; } + bool isInPlt() const { return PltIndex != -1U; } + template <class ELFT> bool hasThunk() const; + + template <class ELFT> + typename ELFT::uint getVA(typename ELFT::uint Addend = 0) const; + + template <class ELFT> typename ELFT::uint getGotOffset() const; + template <class ELFT> typename ELFT::uint getGotVA() const; + template <class ELFT> typename ELFT::uint getGotPltOffset() const; + template <class ELFT> typename ELFT::uint getGotPltVA() const; + template <class ELFT> typename ELFT::uint getPltVA() const; + template <class ELFT> typename ELFT::uint getThunkVA() const; + template <class ELFT> typename ELFT::uint getSize() const; + + // The file from which this symbol was created. + InputFile *File = nullptr; + + uint32_t DynsymIndex = 0; + uint32_t GotIndex = -1; + uint32_t GotPltIndex = -1; + uint32_t PltIndex = -1; + uint32_t GlobalDynIndex = -1; + +protected: + SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type); + + const unsigned SymbolKind : 8; + +public: + // True if the linker has to generate a copy relocation for this shared + // symbol or if the symbol should point to its plt entry. + unsigned NeedsCopyOrPltAddr : 1; + + // True if this is a local symbol. + unsigned IsLocal : 1; + + // True if this symbol has an entry in the global part of MIPS GOT. + unsigned IsInGlobalMipsGot : 1; + + // True if this symbol is referenced by 32-bit GOT relocations. + unsigned Is32BitMipsGot : 1; + + // True if this symbol is in the Iplt sub-section of the Plt. + unsigned IsInIplt : 1; + + // True if this symbol is in the Igot sub-section of the .got.plt or .got. + unsigned IsInIgot : 1; + + // The following fields have the same meaning as the ELF symbol attributes. + uint8_t Type; // symbol type + uint8_t StOther; // st_other field value + + // The Type field may also have this value. It means that we have not yet seen + // a non-Lazy symbol with this name, so we don't know what its type is. The + // Type field is normally set to this value for Lazy symbols unless we saw a + // weak undefined symbol first, in which case we need to remember the original + // symbol's type in order to check for TLS mismatches. + enum { UnknownType = 255 }; + + bool isSection() const { return Type == llvm::ELF::STT_SECTION; } + bool isTls() const { return Type == llvm::ELF::STT_TLS; } + bool isFunc() const { return Type == llvm::ELF::STT_FUNC; } + bool isGnuIFunc() const { return Type == llvm::ELF::STT_GNU_IFUNC; } + bool isObject() const { return Type == llvm::ELF::STT_OBJECT; } + bool isFile() const { return Type == llvm::ELF::STT_FILE; } + +protected: + StringRefZ Name; +}; + +// The base class for any defined symbols. +class Defined : public SymbolBody { +public: + Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type); + static bool classof(const SymbolBody *S) { return S->isDefined(); } +}; + +class DefinedCommon : public Defined { +public: + DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, uint8_t StOther, + uint8_t Type, InputFile *File); + + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::DefinedCommonKind; + } + + // The output offset of this common symbol in the output bss. Computed by the + // writer. + uint64_t Offset; + + // The maximum alignment we have seen for this symbol. + uint64_t Alignment; + + uint64_t Size; +}; + +// Regular defined symbols read from object file symbol tables. +template <class ELFT> class DefinedRegular : public Defined { + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + +public: + DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, + uintX_t Value, uintX_t Size, InputSectionBase<ELFT> *Section, + InputFile *File) + : Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type), + Value(Value), Size(Size), + Section(Section ? Section->Repl : NullInputSection) { + this->File = File; + } + + // Return true if the symbol is a PIC function. + bool isMipsPIC() const; + + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::DefinedRegularKind; + } + + uintX_t Value; + uintX_t Size; + + // The input section this symbol belongs to. Notice that this is + // a reference to a pointer. We are using two levels of indirections + // because of ICF. If ICF decides two sections need to be merged, it + // manipulates this Section pointers so that they point to the same + // section. This is a bit tricky, so be careful to not be confused. + // If this is null, the symbol is an absolute symbol. + InputSectionBase<ELFT> *&Section; + + // If non-null the symbol has a Thunk that may be used as an alternative + // destination for callers of this Symbol. + Thunk<ELFT> *ThunkData = nullptr; + +private: + static InputSectionBase<ELFT> *NullInputSection; +}; + +template <class ELFT> +InputSectionBase<ELFT> *DefinedRegular<ELFT>::NullInputSection; + +// DefinedSynthetic is a class to represent linker-generated ELF symbols. +// The difference from the regular symbol is that DefinedSynthetic symbols +// don't belong to any input files or sections. Thus, its constructor +// takes an output section to calculate output VA, etc. +// If Section is null, this symbol is relative to the image base. +class DefinedSynthetic : public Defined { +public: + DefinedSynthetic(StringRef Name, uint64_t Value, + const OutputSectionBase *Section) + : Defined(SymbolBody::DefinedSyntheticKind, Name, /*IsLocal=*/false, + llvm::ELF::STV_HIDDEN, 0 /* Type */), + Value(Value), Section(Section) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::DefinedSyntheticKind; + } + + uint64_t Value; + const OutputSectionBase *Section; +}; + +template <class ELFT> class Undefined : public SymbolBody { +public: + Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, + InputFile *F); + + static bool classof(const SymbolBody *S) { + return S->kind() == UndefinedKind; + } + + // If non-null the symbol has a Thunk that may be used as an alternative + // destination for callers of this Symbol. When linking a DSO undefined + // symbols are implicitly imported, the symbol lookup will be performed by + // the dynamic loader. A call to an undefined symbol will be given a PLT + // entry and on ARM this may need a Thunk if the caller is in Thumb state. + Thunk<ELFT> *ThunkData = nullptr; + InputFile *file() { return this->File; } +}; + +template <class ELFT> class SharedSymbol : public Defined { + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::uint uintX_t; + +public: + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::SharedKind; + } + + SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym, + const Elf_Verdef *Verdef) + : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, Sym.st_other, + Sym.getType()), + Sym(Sym), Verdef(Verdef) { + // IFuncs defined in DSOs are treated as functions by the static linker. + if (isGnuIFunc()) + Type = llvm::ELF::STT_FUNC; + this->File = F; + } + + SharedFile<ELFT> *file() { return (SharedFile<ELFT> *)this->File; } + + const Elf_Sym &Sym; + + // This field is a pointer to the symbol's version definition. + const Elf_Verdef *Verdef; + + // OffsetInBss is significant only when needsCopy() is true. + uintX_t OffsetInBss = 0; + + // If non-null the symbol has a Thunk that may be used as an alternative + // destination for callers of this Symbol. + Thunk<ELFT> *ThunkData = nullptr; + bool needsCopy() const { return this->NeedsCopyOrPltAddr && !this->isFunc(); } +}; + +// This class represents a symbol defined in an archive file. It is +// created from an archive file header, and it knows how to load an +// object file from an archive to replace itself with a defined +// symbol. If the resolver finds both Undefined and Lazy for +// the same name, it will ask the Lazy to load a file. +class Lazy : public SymbolBody { +public: + static bool classof(const SymbolBody *S) { return S->isLazy(); } + + // Returns an object file for this symbol, or a nullptr if the file + // was already returned. + InputFile *fetch(); + +protected: + Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type) + : SymbolBody(K, Name, /*IsLocal=*/false, llvm::ELF::STV_DEFAULT, Type) {} +}; + +// LazyArchive symbols represents symbols in archive files. +class LazyArchive : public Lazy { +public: + LazyArchive(ArchiveFile &File, const llvm::object::Archive::Symbol S, + uint8_t Type); + + static bool classof(const SymbolBody *S) { + return S->kind() == LazyArchiveKind; + } + + ArchiveFile *file() { return (ArchiveFile *)this->File; } + InputFile *fetch(); + +private: + const llvm::object::Archive::Symbol Sym; +}; + +// LazyObject symbols represents symbols in object files between +// --start-lib and --end-lib options. +class LazyObject : public Lazy { +public: + LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type); + + static bool classof(const SymbolBody *S) { + return S->kind() == LazyObjectKind; + } + + LazyObjectFile *file() { return (LazyObjectFile *)this->File; } + InputFile *fetch(); +}; + +// Some linker-generated symbols need to be created as +// DefinedRegular symbols. +template <class ELFT> struct ElfSym { + // The content for __ehdr_start symbol. + static DefinedRegular<ELFT> *EhdrStart; + + // The content for _etext and etext symbols. + static DefinedRegular<ELFT> *Etext; + static DefinedRegular<ELFT> *Etext2; + + // The content for _edata and edata symbols. + static DefinedRegular<ELFT> *Edata; + static DefinedRegular<ELFT> *Edata2; + + // The content for _end and end symbols. + static DefinedRegular<ELFT> *End; + static DefinedRegular<ELFT> *End2; + + // The content for _gp_disp/__gnu_local_gp symbols for MIPS target. + static DefinedRegular<ELFT> *MipsGpDisp; + static DefinedRegular<ELFT> *MipsLocalGp; + static DefinedRegular<ELFT> *MipsGp; +}; + +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::EhdrStart; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext2; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata2; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End2; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsGpDisp; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsLocalGp; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsGp; + +// A real symbol object, SymbolBody, is usually stored within a Symbol. There's +// always one Symbol for each symbol name. The resolver updates the SymbolBody +// stored in the Body field of this object as it resolves symbols. Symbol also +// holds computed properties of symbol names. +struct Symbol { + // Symbol binding. This is on the Symbol to track changes during resolution. + // In particular: + // An undefined weak is still weak when it resolves to a shared library. + // An undefined weak will not fetch archive members, but we have to remember + // it is weak. + uint8_t Binding; + + // Version definition index. + uint16_t VersionId; + + // Symbol visibility. This is the computed minimum visibility of all + // observed non-DSO symbols. + unsigned Visibility : 2; + + // True if the symbol was used for linking and thus need to be added to the + // output file's symbol table. This is true for all symbols except for + // unreferenced DSO symbols and bitcode symbols that are unreferenced except + // by other bitcode objects. + unsigned IsUsedInRegularObj : 1; + + // If this flag is true and the symbol has protected or default visibility, it + // will appear in .dynsym. This flag is set by interposable DSO symbols in + // executables, by most symbols in DSOs and executables built with + // --export-dynamic, and by dynamic lists. + unsigned ExportDynamic : 1; + + // True if this symbol is specified by --trace-symbol option. + unsigned Traced : 1; + + // This symbol version was found in a version script. + unsigned InVersionScript : 1; + + bool includeInDynsym() const; + bool isWeak() const { return Binding == llvm::ELF::STB_WEAK; } + + // This field is used to store the Symbol's SymbolBody. This instantiation of + // AlignedCharArrayUnion gives us a struct with a char array field that is + // large and aligned enough to store any derived class of SymbolBody. We + // assume that the size and alignment of ELF64LE symbols is sufficient for any + // ELFT, and we verify this with the static_asserts in replaceBody. + llvm::AlignedCharArrayUnion< + DefinedCommon, DefinedRegular<llvm::object::ELF64LE>, DefinedSynthetic, + Undefined<llvm::object::ELF64LE>, SharedSymbol<llvm::object::ELF64LE>, + LazyArchive, LazyObject> + Body; + + SymbolBody *body() { return reinterpret_cast<SymbolBody *>(Body.buffer); } + const SymbolBody *body() const { return const_cast<Symbol *>(this)->body(); } +}; + +void printTraceSymbol(Symbol *Sym); + +template <typename T, typename... ArgT> +void replaceBody(Symbol *S, ArgT &&... Arg) { + static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); + static_assert(alignof(T) <= alignof(decltype(S->Body)), + "Body not aligned enough"); + assert(static_cast<SymbolBody *>(static_cast<T *>(nullptr)) == nullptr && + "Not a SymbolBody"); + + new (S->Body.buffer) T(std::forward<ArgT>(Arg)...); + + // Print out a log message if --trace-symbol was specified. + // This is for debugging. + if (S->Traced) + printTraceSymbol(S); +} + +inline Symbol *SymbolBody::symbol() { + assert(!isLocal()); + return reinterpret_cast<Symbol *>(reinterpret_cast<char *>(this) - + offsetof(Symbol, Body)); +} +} // namespace elf + +std::string toString(const elf::SymbolBody &B); +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp new file mode 100644 index 000000000000..3c8a439ba308 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp @@ -0,0 +1,1990 @@ +//===- SyntheticSections.cpp ----------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains linker-synthesized sections. Currently, +// synthetic sections are created either output sections or input sections, +// but we are rewriting code so that all synthetic sections are created as +// input sections. +// +//===----------------------------------------------------------------------===// + +#include "SyntheticSections.h" +#include "Config.h" +#include "Error.h" +#include "InputFiles.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "OutputSections.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "Target.h" +#include "Threads.h" +#include "Writer.h" +#include "lld/Config/Version.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/xxhash.h" +#include <cstdlib> + +using namespace llvm; +using namespace llvm::dwarf; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +template <class ELFT> static std::vector<DefinedCommon *> getCommonSymbols() { + std::vector<DefinedCommon *> V; + for (Symbol *S : Symtab<ELFT>::X->getSymbols()) + if (auto *B = dyn_cast<DefinedCommon>(S->body())) + V.push_back(B); + return V; +} + +// Find all common symbols and allocate space for them. +template <class ELFT> InputSection<ELFT> *elf::createCommonSection() { + auto *Ret = make<InputSection<ELFT>>(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, 1, + ArrayRef<uint8_t>(), "COMMON"); + Ret->Live = true; + + // Sort the common symbols by alignment as an heuristic to pack them better. + std::vector<DefinedCommon *> Syms = getCommonSymbols<ELFT>(); + std::stable_sort(Syms.begin(), Syms.end(), + [](const DefinedCommon *A, const DefinedCommon *B) { + return A->Alignment > B->Alignment; + }); + + // Assign offsets to symbols. + size_t Size = 0; + size_t Alignment = 1; + for (DefinedCommon *Sym : Syms) { + Alignment = std::max<size_t>(Alignment, Sym->Alignment); + Size = alignTo(Size, Sym->Alignment); + + // Compute symbol offset relative to beginning of input section. + Sym->Offset = Size; + Size += Sym->Size; + } + Ret->Alignment = Alignment; + Ret->Data = makeArrayRef<uint8_t>(nullptr, Size); + return Ret; +} + +// Returns an LLD version string. +static ArrayRef<uint8_t> getVersion() { + // Check LLD_VERSION first for ease of testing. + // You can get consitent output by using the environment variable. + // This is only for testing. + StringRef S = getenv("LLD_VERSION"); + if (S.empty()) + S = Saver.save(Twine("Linker: ") + getLLDVersion()); + + // +1 to include the terminating '\0'. + return {(const uint8_t *)S.data(), S.size() + 1}; +} + +// Creates a .comment section containing LLD version info. +// With this feature, you can identify LLD-generated binaries easily +// by "objdump -s -j .comment <file>". +// The returned object is a mergeable string section. +template <class ELFT> MergeInputSection<ELFT> *elf::createCommentSection() { + typename ELFT::Shdr Hdr = {}; + Hdr.sh_flags = SHF_MERGE | SHF_STRINGS; + Hdr.sh_type = SHT_PROGBITS; + Hdr.sh_entsize = 1; + Hdr.sh_addralign = 1; + + auto *Ret = make<MergeInputSection<ELFT>>(/*file=*/nullptr, &Hdr, ".comment"); + Ret->Data = getVersion(); + Ret->splitIntoPieces(); + return Ret; +} + +// .MIPS.abiflags section. +template <class ELFT> +MipsAbiFlagsSection<ELFT>::MipsAbiFlagsSection(Elf_Mips_ABIFlags Flags) + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"), + Flags(Flags) {} + +template <class ELFT> void MipsAbiFlagsSection<ELFT>::writeTo(uint8_t *Buf) { + memcpy(Buf, &Flags, sizeof(Flags)); +} + +template <class ELFT> +MipsAbiFlagsSection<ELFT> *MipsAbiFlagsSection<ELFT>::create() { + Elf_Mips_ABIFlags Flags = {}; + bool Create = false; + + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + if (!Sec->Live || Sec->Type != SHT_MIPS_ABIFLAGS) + continue; + Sec->Live = false; + Create = true; + + std::string Filename = toString(Sec->getFile()); + const size_t Size = Sec->Data.size(); + // Older version of BFD (such as the default FreeBSD linker) concatenate + // .MIPS.abiflags instead of merging. To allow for this case (or potential + // zero padding) we ignore everything after the first Elf_Mips_ABIFlags + if (Size < sizeof(Elf_Mips_ABIFlags)) { + error(Filename + ": invalid size of .MIPS.abiflags section: got " + + Twine(Size) + " instead of " + Twine(sizeof(Elf_Mips_ABIFlags))); + return nullptr; + } + auto *S = reinterpret_cast<const Elf_Mips_ABIFlags *>(Sec->Data.data()); + if (S->version != 0) { + error(Filename + ": unexpected .MIPS.abiflags version " + + Twine(S->version)); + return nullptr; + } + + // LLD checks ISA compatibility in getMipsEFlags(). Here we just + // select the highest number of ISA/Rev/Ext. + Flags.isa_level = std::max(Flags.isa_level, S->isa_level); + Flags.isa_rev = std::max(Flags.isa_rev, S->isa_rev); + Flags.isa_ext = std::max(Flags.isa_ext, S->isa_ext); + Flags.gpr_size = std::max(Flags.gpr_size, S->gpr_size); + Flags.cpr1_size = std::max(Flags.cpr1_size, S->cpr1_size); + Flags.cpr2_size = std::max(Flags.cpr2_size, S->cpr2_size); + Flags.ases |= S->ases; + Flags.flags1 |= S->flags1; + Flags.flags2 |= S->flags2; + Flags.fp_abi = elf::getMipsFpAbiFlag(Flags.fp_abi, S->fp_abi, Filename); + }; + + if (Create) + return make<MipsAbiFlagsSection<ELFT>>(Flags); + return nullptr; +} + +// .MIPS.options section. +template <class ELFT> +MipsOptionsSection<ELFT>::MipsOptionsSection(Elf_Mips_RegInfo Reginfo) + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"), + Reginfo(Reginfo) {} + +template <class ELFT> void MipsOptionsSection<ELFT>::writeTo(uint8_t *Buf) { + auto *Options = reinterpret_cast<Elf_Mips_Options *>(Buf); + Options->kind = ODK_REGINFO; + Options->size = getSize(); + + if (!Config->Relocatable) + Reginfo.ri_gp_value = In<ELFT>::MipsGot->getGp(); + memcpy(Buf + sizeof(Elf_Mips_Options), &Reginfo, sizeof(Reginfo)); +} + +template <class ELFT> +MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() { + // N64 ABI only. + if (!ELFT::Is64Bits) + return nullptr; + + Elf_Mips_RegInfo Reginfo = {}; + bool Create = false; + + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + if (!Sec->Live || Sec->Type != SHT_MIPS_OPTIONS) + continue; + Sec->Live = false; + Create = true; + + std::string Filename = toString(Sec->getFile()); + ArrayRef<uint8_t> D = Sec->Data; + + while (!D.empty()) { + if (D.size() < sizeof(Elf_Mips_Options)) { + error(Filename + ": invalid size of .MIPS.options section"); + break; + } + + auto *Opt = reinterpret_cast<const Elf_Mips_Options *>(D.data()); + if (Opt->kind == ODK_REGINFO) { + if (Config->Relocatable && Opt->getRegInfo().ri_gp_value) + error(Filename + ": unsupported non-zero ri_gp_value"); + Reginfo.ri_gprmask |= Opt->getRegInfo().ri_gprmask; + Sec->getFile()->MipsGp0 = Opt->getRegInfo().ri_gp_value; + break; + } + + if (!Opt->size) + fatal(Filename + ": zero option descriptor size"); + D = D.slice(Opt->size); + } + }; + + if (Create) + return make<MipsOptionsSection<ELFT>>(Reginfo); + return nullptr; +} + +// MIPS .reginfo section. +template <class ELFT> +MipsReginfoSection<ELFT>::MipsReginfoSection(Elf_Mips_RegInfo Reginfo) + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"), + Reginfo(Reginfo) {} + +template <class ELFT> void MipsReginfoSection<ELFT>::writeTo(uint8_t *Buf) { + if (!Config->Relocatable) + Reginfo.ri_gp_value = In<ELFT>::MipsGot->getGp(); + memcpy(Buf, &Reginfo, sizeof(Reginfo)); +} + +template <class ELFT> +MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() { + // Section should be alive for O32 and N32 ABIs only. + if (ELFT::Is64Bits) + return nullptr; + + Elf_Mips_RegInfo Reginfo = {}; + bool Create = false; + + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + if (!Sec->Live || Sec->Type != SHT_MIPS_REGINFO) + continue; + Sec->Live = false; + Create = true; + + if (Sec->Data.size() != sizeof(Elf_Mips_RegInfo)) { + error(toString(Sec->getFile()) + ": invalid size of .reginfo section"); + return nullptr; + } + auto *R = reinterpret_cast<const Elf_Mips_RegInfo *>(Sec->Data.data()); + if (Config->Relocatable && R->ri_gp_value) + error(toString(Sec->getFile()) + ": unsupported non-zero ri_gp_value"); + + Reginfo.ri_gprmask |= R->ri_gprmask; + Sec->getFile()->MipsGp0 = R->ri_gp_value; + }; + + if (Create) + return make<MipsReginfoSection<ELFT>>(Reginfo); + return nullptr; +} + +template <class ELFT> InputSection<ELFT> *elf::createInterpSection() { + auto *Ret = make<InputSection<ELFT>>(SHF_ALLOC, SHT_PROGBITS, 1, + ArrayRef<uint8_t>(), ".interp"); + Ret->Live = true; + + // StringSaver guarantees that the returned string ends with '\0'. + StringRef S = Saver.save(Config->DynamicLinker); + Ret->Data = {(const uint8_t *)S.data(), S.size() + 1}; + return Ret; +} + +static size_t getHashSize() { + switch (Config->BuildId) { + case BuildIdKind::Fast: + return 8; + case BuildIdKind::Md5: + case BuildIdKind::Uuid: + return 16; + case BuildIdKind::Sha1: + return 20; + case BuildIdKind::Hexstring: + return Config->BuildIdVector.size(); + default: + llvm_unreachable("unknown BuildIdKind"); + } +} + +template <class ELFT> +BuildIdSection<ELFT>::BuildIdSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_NOTE, 1, ".note.gnu.build-id"), + HashSize(getHashSize()) {} + +template <class ELFT> void BuildIdSection<ELFT>::writeTo(uint8_t *Buf) { + const endianness E = ELFT::TargetEndianness; + write32<E>(Buf, 4); // Name size + write32<E>(Buf + 4, HashSize); // Content size + write32<E>(Buf + 8, NT_GNU_BUILD_ID); // Type + memcpy(Buf + 12, "GNU", 4); // Name string + HashBuf = Buf + 16; +} + +// Split one uint8 array into small pieces of uint8 arrays. +static std::vector<ArrayRef<uint8_t>> split(ArrayRef<uint8_t> Arr, + size_t ChunkSize) { + std::vector<ArrayRef<uint8_t>> Ret; + while (Arr.size() > ChunkSize) { + Ret.push_back(Arr.take_front(ChunkSize)); + Arr = Arr.drop_front(ChunkSize); + } + if (!Arr.empty()) + Ret.push_back(Arr); + return Ret; +} + +// Computes a hash value of Data using a given hash function. +// In order to utilize multiple cores, we first split data into 1MB +// chunks, compute a hash for each chunk, and then compute a hash value +// of the hash values. +template <class ELFT> +void BuildIdSection<ELFT>::computeHash( + llvm::ArrayRef<uint8_t> Data, + std::function<void(uint8_t *Dest, ArrayRef<uint8_t> Arr)> HashFn) { + std::vector<ArrayRef<uint8_t>> Chunks = split(Data, 1024 * 1024); + std::vector<uint8_t> Hashes(Chunks.size() * HashSize); + + // Compute hash values. + forLoop(0, Chunks.size(), + [&](size_t I) { HashFn(Hashes.data() + I * HashSize, Chunks[I]); }); + + // Write to the final output buffer. + HashFn(HashBuf, Hashes); +} + +template <class ELFT> +void BuildIdSection<ELFT>::writeBuildId(ArrayRef<uint8_t> Buf) { + switch (Config->BuildId) { + case BuildIdKind::Fast: + computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) { + write64le(Dest, xxHash64(toStringRef(Arr))); + }); + break; + case BuildIdKind::Md5: + computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) { + memcpy(Dest, MD5::hash(Arr).data(), 16); + }); + break; + case BuildIdKind::Sha1: + computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) { + memcpy(Dest, SHA1::hash(Arr).data(), 20); + }); + break; + case BuildIdKind::Uuid: + if (getRandomBytes(HashBuf, HashSize)) + error("entropy source failure"); + break; + case BuildIdKind::Hexstring: + memcpy(HashBuf, Config->BuildIdVector.data(), Config->BuildIdVector.size()); + break; + default: + llvm_unreachable("unknown BuildIdKind"); + } +} + +template <class ELFT> +GotSection<ELFT>::GotSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotEntrySize, ".got") {} + +template <class ELFT> void GotSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.GotIndex = NumEntries; + ++NumEntries; +} + +template <class ELFT> bool GotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) { + if (Sym.GlobalDynIndex != -1U) + return false; + Sym.GlobalDynIndex = NumEntries; + // Global Dynamic TLS entries take two GOT slots. + NumEntries += 2; + return true; +} + +// Reserves TLS entries for a TLS module ID and a TLS block offset. +// In total it takes two GOT slots. +template <class ELFT> bool GotSection<ELFT>::addTlsIndex() { + if (TlsIndexOff != uint32_t(-1)) + return false; + TlsIndexOff = NumEntries * sizeof(uintX_t); + NumEntries += 2; + return true; +} + +template <class ELFT> +typename GotSection<ELFT>::uintX_t +GotSection<ELFT>::getGlobalDynAddr(const SymbolBody &B) const { + return this->getVA() + B.GlobalDynIndex * sizeof(uintX_t); +} + +template <class ELFT> +typename GotSection<ELFT>::uintX_t +GotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const { + return B.GlobalDynIndex * sizeof(uintX_t); +} + +template <class ELFT> void GotSection<ELFT>::finalize() { + Size = NumEntries * sizeof(uintX_t); +} + +template <class ELFT> bool GotSection<ELFT>::empty() const { + // If we have a relocation that is relative to GOT (such as GOTOFFREL), + // we need to emit a GOT even if it's empty. + return NumEntries == 0 && !HasGotOffRel; +} + +template <class ELFT> void GotSection<ELFT>::writeTo(uint8_t *Buf) { + this->relocate(Buf, Buf + Size); +} + +template <class ELFT> +MipsGotSection<ELFT>::MipsGotSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, + SHT_PROGBITS, Target->GotEntrySize, ".got") {} + +template <class ELFT> +void MipsGotSection<ELFT>::addEntry(SymbolBody &Sym, uintX_t Addend, + RelExpr Expr) { + // For "true" local symbols which can be referenced from the same module + // only compiler creates two instructions for address loading: + // + // lw $8, 0($gp) # R_MIPS_GOT16 + // addi $8, $8, 0 # R_MIPS_LO16 + // + // The first instruction loads high 16 bits of the symbol address while + // the second adds an offset. That allows to reduce number of required + // GOT entries because only one global offset table entry is necessary + // for every 64 KBytes of local data. So for local symbols we need to + // allocate number of GOT entries to hold all required "page" addresses. + // + // All global symbols (hidden and regular) considered by compiler uniformly. + // It always generates a single `lw` instruction and R_MIPS_GOT16 relocation + // to load address of the symbol. So for each such symbol we need to + // allocate dedicated GOT entry to store its address. + // + // If a symbol is preemptible we need help of dynamic linker to get its + // final address. The corresponding GOT entries are allocated in the + // "global" part of GOT. Entries for non preemptible global symbol allocated + // in the "local" part of GOT. + // + // See "Global Offset Table" in Chapter 5: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Expr == R_MIPS_GOT_LOCAL_PAGE) { + // At this point we do not know final symbol value so to reduce number + // of allocated GOT entries do the following trick. Save all output + // sections referenced by GOT relocations. Then later in the `finalize` + // method calculate number of "pages" required to cover all saved output + // section and allocate appropriate number of GOT entries. + PageIndexMap.insert({cast<DefinedRegular<ELFT>>(&Sym)->Section->OutSec, 0}); + return; + } + if (Sym.isTls()) { + // GOT entries created for MIPS TLS relocations behave like + // almost GOT entries from other ABIs. They go to the end + // of the global offset table. + Sym.GotIndex = TlsEntries.size(); + TlsEntries.push_back(&Sym); + return; + } + auto AddEntry = [&](SymbolBody &S, uintX_t A, GotEntries &Items) { + if (S.isInGot() && !A) + return; + size_t NewIndex = Items.size(); + if (!EntryIndexMap.insert({{&S, A}, NewIndex}).second) + return; + Items.emplace_back(&S, A); + if (!A) + S.GotIndex = NewIndex; + }; + if (Sym.isPreemptible()) { + // Ignore addends for preemptible symbols. They got single GOT entry anyway. + AddEntry(Sym, 0, GlobalEntries); + Sym.IsInGlobalMipsGot = true; + } else if (Expr == R_MIPS_GOT_OFF32) { + AddEntry(Sym, Addend, LocalEntries32); + Sym.Is32BitMipsGot = true; + } else { + // Hold local GOT entries accessed via a 16-bit index separately. + // That allows to write them in the beginning of the GOT and keep + // their indexes as less as possible to escape relocation's overflow. + AddEntry(Sym, Addend, LocalEntries); + } +} + +template <class ELFT> +bool MipsGotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) { + if (Sym.GlobalDynIndex != -1U) + return false; + Sym.GlobalDynIndex = TlsEntries.size(); + // Global Dynamic TLS entries take two GOT slots. + TlsEntries.push_back(nullptr); + TlsEntries.push_back(&Sym); + return true; +} + +// Reserves TLS entries for a TLS module ID and a TLS block offset. +// In total it takes two GOT slots. +template <class ELFT> bool MipsGotSection<ELFT>::addTlsIndex() { + if (TlsIndexOff != uint32_t(-1)) + return false; + TlsIndexOff = TlsEntries.size() * sizeof(uintX_t); + TlsEntries.push_back(nullptr); + TlsEntries.push_back(nullptr); + return true; +} + +static uint64_t getMipsPageAddr(uint64_t Addr) { + return (Addr + 0x8000) & ~0xffff; +} + +static uint64_t getMipsPageCount(uint64_t Size) { + return (Size + 0xfffe) / 0xffff + 1; +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getPageEntryOffset(const SymbolBody &B, + uintX_t Addend) const { + const OutputSectionBase *OutSec = + cast<DefinedRegular<ELFT>>(&B)->Section->OutSec; + uintX_t SecAddr = getMipsPageAddr(OutSec->Addr); + uintX_t SymAddr = getMipsPageAddr(B.getVA<ELFT>(Addend)); + uintX_t Index = PageIndexMap.lookup(OutSec) + (SymAddr - SecAddr) / 0xffff; + assert(Index < PageEntriesNum); + return (HeaderEntriesNum + Index) * sizeof(uintX_t); +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getBodyEntryOffset(const SymbolBody &B, + uintX_t Addend) const { + // Calculate offset of the GOT entries block: TLS, global, local. + uintX_t Index = HeaderEntriesNum + PageEntriesNum; + if (B.isTls()) + Index += LocalEntries.size() + LocalEntries32.size() + GlobalEntries.size(); + else if (B.IsInGlobalMipsGot) + Index += LocalEntries.size() + LocalEntries32.size(); + else if (B.Is32BitMipsGot) + Index += LocalEntries.size(); + // Calculate offset of the GOT entry in the block. + if (B.isInGot()) + Index += B.GotIndex; + else { + auto It = EntryIndexMap.find({&B, Addend}); + assert(It != EntryIndexMap.end()); + Index += It->second; + } + return Index * sizeof(uintX_t); +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getTlsOffset() const { + return (getLocalEntriesNum() + GlobalEntries.size()) * sizeof(uintX_t); +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const { + return B.GlobalDynIndex * sizeof(uintX_t); +} + +template <class ELFT> +const SymbolBody *MipsGotSection<ELFT>::getFirstGlobalEntry() const { + return GlobalEntries.empty() ? nullptr : GlobalEntries.front().first; +} + +template <class ELFT> +unsigned MipsGotSection<ELFT>::getLocalEntriesNum() const { + return HeaderEntriesNum + PageEntriesNum + LocalEntries.size() + + LocalEntries32.size(); +} + +template <class ELFT> void MipsGotSection<ELFT>::finalize() { + PageEntriesNum = 0; + for (std::pair<const OutputSectionBase *, size_t> &P : PageIndexMap) { + // For each output section referenced by GOT page relocations calculate + // and save into PageIndexMap an upper bound of MIPS GOT entries required + // to store page addresses of local symbols. We assume the worst case - + // each 64kb page of the output section has at least one GOT relocation + // against it. And take in account the case when the section intersects + // page boundaries. + P.second = PageEntriesNum; + PageEntriesNum += getMipsPageCount(P.first->Size); + } + Size = (getLocalEntriesNum() + GlobalEntries.size() + TlsEntries.size()) * + sizeof(uintX_t); +} + +template <class ELFT> bool MipsGotSection<ELFT>::empty() const { + // We add the .got section to the result for dynamic MIPS target because + // its address and properties are mentioned in the .dynamic section. + return Config->Relocatable; +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t MipsGotSection<ELFT>::getGp() const { + return ElfSym<ELFT>::MipsGp->template getVA<ELFT>(0); +} + +template <class ELFT> +static void writeUint(uint8_t *Buf, typename ELFT::uint Val) { + typedef typename ELFT::uint uintX_t; + write<uintX_t, ELFT::TargetEndianness, sizeof(uintX_t)>(Buf, Val); +} + +template <class ELFT> void MipsGotSection<ELFT>::writeTo(uint8_t *Buf) { + // Set the MSB of the second GOT slot. This is not required by any + // MIPS ABI documentation, though. + // + // There is a comment in glibc saying that "The MSB of got[1] of a + // gnu object is set to identify gnu objects," and in GNU gold it + // says "the second entry will be used by some runtime loaders". + // But how this field is being used is unclear. + // + // We are not really willing to mimic other linkers behaviors + // without understanding why they do that, but because all files + // generated by GNU tools have this special GOT value, and because + // we've been doing this for years, it is probably a safe bet to + // keep doing this for now. We really need to revisit this to see + // if we had to do this. + auto *P = reinterpret_cast<typename ELFT::Off *>(Buf); + P[1] = uintX_t(1) << (ELFT::Is64Bits ? 63 : 31); + Buf += HeaderEntriesNum * sizeof(uintX_t); + // Write 'page address' entries to the local part of the GOT. + for (std::pair<const OutputSectionBase *, size_t> &L : PageIndexMap) { + size_t PageCount = getMipsPageCount(L.first->Size); + uintX_t FirstPageAddr = getMipsPageAddr(L.first->Addr); + for (size_t PI = 0; PI < PageCount; ++PI) { + uint8_t *Entry = Buf + (L.second + PI) * sizeof(uintX_t); + writeUint<ELFT>(Entry, FirstPageAddr + PI * 0x10000); + } + } + Buf += PageEntriesNum * sizeof(uintX_t); + auto AddEntry = [&](const GotEntry &SA) { + uint8_t *Entry = Buf; + Buf += sizeof(uintX_t); + const SymbolBody *Body = SA.first; + uintX_t VA = Body->template getVA<ELFT>(SA.second); + writeUint<ELFT>(Entry, VA); + }; + std::for_each(std::begin(LocalEntries), std::end(LocalEntries), AddEntry); + std::for_each(std::begin(LocalEntries32), std::end(LocalEntries32), AddEntry); + std::for_each(std::begin(GlobalEntries), std::end(GlobalEntries), AddEntry); + // Initialize TLS-related GOT entries. If the entry has a corresponding + // dynamic relocations, leave it initialized by zero. Write down adjusted + // TLS symbol's values otherwise. To calculate the adjustments use offsets + // for thread-local storage. + // https://www.linux-mips.org/wiki/NPTL + if (TlsIndexOff != -1U && !Config->Pic) + writeUint<ELFT>(Buf + TlsIndexOff, 1); + for (const SymbolBody *B : TlsEntries) { + if (!B || B->isPreemptible()) + continue; + uintX_t VA = B->getVA<ELFT>(); + if (B->GotIndex != -1U) { + uint8_t *Entry = Buf + B->GotIndex * sizeof(uintX_t); + writeUint<ELFT>(Entry, VA - 0x7000); + } + if (B->GlobalDynIndex != -1U) { + uint8_t *Entry = Buf + B->GlobalDynIndex * sizeof(uintX_t); + writeUint<ELFT>(Entry, 1); + Entry += sizeof(uintX_t); + writeUint<ELFT>(Entry, VA - 0x8000); + } + } +} + +template <class ELFT> +GotPltSection<ELFT>::GotPltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotPltEntrySize, ".got.plt") {} + +template <class ELFT> void GotPltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.GotPltIndex = Target->GotPltHeaderEntriesNum + Entries.size(); + Entries.push_back(&Sym); +} + +template <class ELFT> size_t GotPltSection<ELFT>::getSize() const { + return (Target->GotPltHeaderEntriesNum + Entries.size()) * + Target->GotPltEntrySize; +} + +template <class ELFT> void GotPltSection<ELFT>::writeTo(uint8_t *Buf) { + Target->writeGotPltHeader(Buf); + Buf += Target->GotPltHeaderEntriesNum * Target->GotPltEntrySize; + for (const SymbolBody *B : Entries) { + Target->writeGotPlt(Buf, *B); + Buf += sizeof(uintX_t); + } +} + +// On ARM the IgotPltSection is part of the GotSection, on other Targets it is +// part of the .got.plt +template <class ELFT> +IgotPltSection<ELFT>::IgotPltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotPltEntrySize, + Config->EMachine == EM_ARM ? ".got" : ".got.plt") { +} + +template <class ELFT> void IgotPltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.IsInIgot = true; + Sym.GotPltIndex = Entries.size(); + Entries.push_back(&Sym); +} + +template <class ELFT> size_t IgotPltSection<ELFT>::getSize() const { + return Entries.size() * Target->GotPltEntrySize; +} + +template <class ELFT> void IgotPltSection<ELFT>::writeTo(uint8_t *Buf) { + for (const SymbolBody *B : Entries) { + Target->writeIgotPlt(Buf, *B); + Buf += sizeof(uintX_t); + } +} + +template <class ELFT> +StringTableSection<ELFT>::StringTableSection(StringRef Name, bool Dynamic) + : SyntheticSection<ELFT>(Dynamic ? (uintX_t)SHF_ALLOC : 0, SHT_STRTAB, 1, + Name), + Dynamic(Dynamic) {} + +// Adds a string to the string table. If HashIt is true we hash and check for +// duplicates. It is optional because the name of global symbols are already +// uniqued and hashing them again has a big cost for a small value: uniquing +// them with some other string that happens to be the same. +template <class ELFT> +unsigned StringTableSection<ELFT>::addString(StringRef S, bool HashIt) { + if (HashIt) { + auto R = StringMap.insert(std::make_pair(S, this->Size)); + if (!R.second) + return R.first->second; + } + unsigned Ret = this->Size; + this->Size = this->Size + S.size() + 1; + Strings.push_back(S); + return Ret; +} + +template <class ELFT> void StringTableSection<ELFT>::writeTo(uint8_t *Buf) { + // ELF string tables start with NUL byte, so advance the pointer by one. + ++Buf; + for (StringRef S : Strings) { + memcpy(Buf, S.data(), S.size()); + Buf += S.size() + 1; + } +} + +// Returns the number of version definition entries. Because the first entry +// is for the version definition itself, it is the number of versioned symbols +// plus one. Note that we don't support multiple versions yet. +static unsigned getVerDefNum() { return Config->VersionDefinitions.size() + 1; } + +template <class ELFT> +DynamicSection<ELFT>::DynamicSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, + sizeof(uintX_t), ".dynamic") { + this->Entsize = ELFT::Is64Bits ? 16 : 8; + // .dynamic section is not writable on MIPS. + // See "Special Section" in Chapter 4 in the following document: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Config->EMachine == EM_MIPS) + this->Flags = SHF_ALLOC; + + addEntries(); +} + +// There are some dynamic entries that don't depend on other sections. +// Such entries can be set early. +template <class ELFT> void DynamicSection<ELFT>::addEntries() { + // Add strings to .dynstr early so that .dynstr's size will be + // fixed early. + for (StringRef S : Config->AuxiliaryList) + add({DT_AUXILIARY, In<ELFT>::DynStrTab->addString(S)}); + if (!Config->RPath.empty()) + add({Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH, + In<ELFT>::DynStrTab->addString(Config->RPath)}); + for (SharedFile<ELFT> *F : Symtab<ELFT>::X->getSharedFiles()) + if (F->isNeeded()) + add({DT_NEEDED, In<ELFT>::DynStrTab->addString(F->getSoName())}); + if (!Config->SoName.empty()) + add({DT_SONAME, In<ELFT>::DynStrTab->addString(Config->SoName)}); + + // Set DT_FLAGS and DT_FLAGS_1. + uint32_t DtFlags = 0; + uint32_t DtFlags1 = 0; + if (Config->Bsymbolic) + DtFlags |= DF_SYMBOLIC; + if (Config->ZNodelete) + DtFlags1 |= DF_1_NODELETE; + if (Config->ZNow) { + DtFlags |= DF_BIND_NOW; + DtFlags1 |= DF_1_NOW; + } + if (Config->ZOrigin) { + DtFlags |= DF_ORIGIN; + DtFlags1 |= DF_1_ORIGIN; + } + + if (DtFlags) + add({DT_FLAGS, DtFlags}); + if (DtFlags1) + add({DT_FLAGS_1, DtFlags1}); + + if (!Config->Shared && !Config->Relocatable) + add({DT_DEBUG, (uint64_t)0}); +} + +// Add remaining entries to complete .dynamic contents. +template <class ELFT> void DynamicSection<ELFT>::finalize() { + if (this->Size) + return; // Already finalized. + + this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + if (In<ELFT>::RelaDyn->OutSec->Size > 0) { + bool IsRela = Config->Rela; + add({IsRela ? DT_RELA : DT_REL, In<ELFT>::RelaDyn}); + add({IsRela ? DT_RELASZ : DT_RELSZ, In<ELFT>::RelaDyn->OutSec->Size}); + add({IsRela ? DT_RELAENT : DT_RELENT, + uintX_t(IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel))}); + + // MIPS dynamic loader does not support RELCOUNT tag. + // The problem is in the tight relation between dynamic + // relocations and GOT. So do not emit this tag on MIPS. + if (Config->EMachine != EM_MIPS) { + size_t NumRelativeRels = In<ELFT>::RelaDyn->getRelativeRelocCount(); + if (Config->ZCombreloc && NumRelativeRels) + add({IsRela ? DT_RELACOUNT : DT_RELCOUNT, NumRelativeRels}); + } + } + if (In<ELFT>::RelaPlt->OutSec->Size > 0) { + add({DT_JMPREL, In<ELFT>::RelaPlt}); + add({DT_PLTRELSZ, In<ELFT>::RelaPlt->OutSec->Size}); + add({Config->EMachine == EM_MIPS ? DT_MIPS_PLTGOT : DT_PLTGOT, + In<ELFT>::GotPlt}); + add({DT_PLTREL, uint64_t(Config->Rela ? DT_RELA : DT_REL)}); + } + + add({DT_SYMTAB, In<ELFT>::DynSymTab}); + add({DT_SYMENT, sizeof(Elf_Sym)}); + add({DT_STRTAB, In<ELFT>::DynStrTab}); + add({DT_STRSZ, In<ELFT>::DynStrTab->getSize()}); + if (In<ELFT>::GnuHashTab) + add({DT_GNU_HASH, In<ELFT>::GnuHashTab}); + if (In<ELFT>::HashTab) + add({DT_HASH, In<ELFT>::HashTab}); + + if (Out<ELFT>::PreinitArray) { + add({DT_PREINIT_ARRAY, Out<ELFT>::PreinitArray}); + add({DT_PREINIT_ARRAYSZ, Out<ELFT>::PreinitArray, Entry::SecSize}); + } + if (Out<ELFT>::InitArray) { + add({DT_INIT_ARRAY, Out<ELFT>::InitArray}); + add({DT_INIT_ARRAYSZ, Out<ELFT>::InitArray, Entry::SecSize}); + } + if (Out<ELFT>::FiniArray) { + add({DT_FINI_ARRAY, Out<ELFT>::FiniArray}); + add({DT_FINI_ARRAYSZ, Out<ELFT>::FiniArray, Entry::SecSize}); + } + + if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Init)) + add({DT_INIT, B}); + if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Fini)) + add({DT_FINI, B}); + + bool HasVerNeed = In<ELFT>::VerNeed->getNeedNum() != 0; + if (HasVerNeed || In<ELFT>::VerDef) + add({DT_VERSYM, In<ELFT>::VerSym}); + if (In<ELFT>::VerDef) { + add({DT_VERDEF, In<ELFT>::VerDef}); + add({DT_VERDEFNUM, getVerDefNum()}); + } + if (HasVerNeed) { + add({DT_VERNEED, In<ELFT>::VerNeed}); + add({DT_VERNEEDNUM, In<ELFT>::VerNeed->getNeedNum()}); + } + + if (Config->EMachine == EM_MIPS) { + add({DT_MIPS_RLD_VERSION, 1}); + add({DT_MIPS_FLAGS, RHF_NOTPOT}); + add({DT_MIPS_BASE_ADDRESS, Config->ImageBase}); + add({DT_MIPS_SYMTABNO, In<ELFT>::DynSymTab->getNumSymbols()}); + add({DT_MIPS_LOCAL_GOTNO, In<ELFT>::MipsGot->getLocalEntriesNum()}); + if (const SymbolBody *B = In<ELFT>::MipsGot->getFirstGlobalEntry()) + add({DT_MIPS_GOTSYM, B->DynsymIndex}); + else + add({DT_MIPS_GOTSYM, In<ELFT>::DynSymTab->getNumSymbols()}); + add({DT_PLTGOT, In<ELFT>::MipsGot}); + if (In<ELFT>::MipsRldMap) + add({DT_MIPS_RLD_MAP, In<ELFT>::MipsRldMap}); + } + + this->OutSec->Entsize = this->Entsize; + this->OutSec->Link = this->Link; + + // +1 for DT_NULL + this->Size = (Entries.size() + 1) * this->Entsize; +} + +template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *Buf) { + auto *P = reinterpret_cast<Elf_Dyn *>(Buf); + + for (const Entry &E : Entries) { + P->d_tag = E.Tag; + switch (E.Kind) { + case Entry::SecAddr: + P->d_un.d_ptr = E.OutSec->Addr; + break; + case Entry::InSecAddr: + P->d_un.d_ptr = E.InSec->OutSec->Addr + E.InSec->OutSecOff; + break; + case Entry::SecSize: + P->d_un.d_val = E.OutSec->Size; + break; + case Entry::SymAddr: + P->d_un.d_ptr = E.Sym->template getVA<ELFT>(); + break; + case Entry::PlainInt: + P->d_un.d_val = E.Val; + break; + } + ++P; + } +} + +template <class ELFT> +typename ELFT::uint DynamicReloc<ELFT>::getOffset() const { + if (OutputSec) + return OutputSec->Addr + OffsetInSec; + return InputSec->OutSec->Addr + InputSec->getOffset(OffsetInSec); +} + +template <class ELFT> +typename ELFT::uint DynamicReloc<ELFT>::getAddend() const { + if (UseSymVA) + return Sym->getVA<ELFT>(Addend); + return Addend; +} + +template <class ELFT> uint32_t DynamicReloc<ELFT>::getSymIndex() const { + if (Sym && !UseSymVA) + return Sym->DynsymIndex; + return 0; +} + +template <class ELFT> +RelocationSection<ELFT>::RelocationSection(StringRef Name, bool Sort) + : SyntheticSection<ELFT>(SHF_ALLOC, Config->Rela ? SHT_RELA : SHT_REL, + sizeof(uintX_t), Name), + Sort(Sort) { + this->Entsize = Config->Rela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); +} + +template <class ELFT> +void RelocationSection<ELFT>::addReloc(const DynamicReloc<ELFT> &Reloc) { + if (Reloc.Type == Target->RelativeRel) + ++NumRelativeRelocs; + Relocs.push_back(Reloc); +} + +template <class ELFT, class RelTy> +static bool compRelocations(const RelTy &A, const RelTy &B) { + bool AIsRel = A.getType(Config->Mips64EL) == Target->RelativeRel; + bool BIsRel = B.getType(Config->Mips64EL) == Target->RelativeRel; + if (AIsRel != BIsRel) + return AIsRel; + + return A.getSymbol(Config->Mips64EL) < B.getSymbol(Config->Mips64EL); +} + +template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) { + uint8_t *BufBegin = Buf; + for (const DynamicReloc<ELFT> &Rel : Relocs) { + auto *P = reinterpret_cast<Elf_Rela *>(Buf); + Buf += Config->Rela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); + + if (Config->Rela) + P->r_addend = Rel.getAddend(); + P->r_offset = Rel.getOffset(); + if (Config->EMachine == EM_MIPS && Rel.getInputSec() == In<ELFT>::MipsGot) + // Dynamic relocation against MIPS GOT section make deal TLS entries + // allocated in the end of the GOT. We need to adjust the offset to take + // in account 'local' and 'global' GOT entries. + P->r_offset += In<ELFT>::MipsGot->getTlsOffset(); + P->setSymbolAndType(Rel.getSymIndex(), Rel.Type, Config->Mips64EL); + } + + if (Sort) { + if (Config->Rela) + std::stable_sort((Elf_Rela *)BufBegin, + (Elf_Rela *)BufBegin + Relocs.size(), + compRelocations<ELFT, Elf_Rela>); + else + std::stable_sort((Elf_Rel *)BufBegin, (Elf_Rel *)BufBegin + Relocs.size(), + compRelocations<ELFT, Elf_Rel>); + } +} + +template <class ELFT> unsigned RelocationSection<ELFT>::getRelocOffset() { + return this->Entsize * Relocs.size(); +} + +template <class ELFT> void RelocationSection<ELFT>::finalize() { + this->Link = In<ELFT>::DynSymTab ? In<ELFT>::DynSymTab->OutSec->SectionIndex + : In<ELFT>::SymTab->OutSec->SectionIndex; + + // Set required output section properties. + this->OutSec->Link = this->Link; + this->OutSec->Entsize = this->Entsize; +} + +template <class ELFT> +SymbolTableSection<ELFT>::SymbolTableSection( + StringTableSection<ELFT> &StrTabSec) + : SyntheticSection<ELFT>(StrTabSec.isDynamic() ? (uintX_t)SHF_ALLOC : 0, + StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, + sizeof(uintX_t), + StrTabSec.isDynamic() ? ".dynsym" : ".symtab"), + StrTabSec(StrTabSec) { + this->Entsize = sizeof(Elf_Sym); +} + +// Orders symbols according to their positions in the GOT, +// in compliance with MIPS ABI rules. +// See "Global Offset Table" in Chapter 5 in the following document +// for detailed description: +// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf +static bool sortMipsSymbols(const SymbolBody *L, const SymbolBody *R) { + // Sort entries related to non-local preemptible symbols by GOT indexes. + // All other entries go to the first part of GOT in arbitrary order. + bool LIsInLocalGot = !L->IsInGlobalMipsGot; + bool RIsInLocalGot = !R->IsInGlobalMipsGot; + if (LIsInLocalGot || RIsInLocalGot) + return !RIsInLocalGot; + return L->GotIndex < R->GotIndex; +} + +static uint8_t getSymbolBinding(SymbolBody *Body) { + Symbol *S = Body->symbol(); + if (Config->Relocatable) + return S->Binding; + uint8_t Visibility = S->Visibility; + if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) + return STB_LOCAL; + if (Config->NoGnuUnique && S->Binding == STB_GNU_UNIQUE) + return STB_GLOBAL; + return S->Binding; +} + +template <class ELFT> void SymbolTableSection<ELFT>::finalize() { + this->OutSec->Link = this->Link = StrTabSec.OutSec->SectionIndex; + this->OutSec->Info = this->Info = NumLocals + 1; + this->OutSec->Entsize = this->Entsize; + + if (Config->Relocatable) { + size_t I = NumLocals; + for (const SymbolTableEntry &S : Symbols) + S.Symbol->DynsymIndex = ++I; + return; + } + + if (!StrTabSec.isDynamic()) { + std::stable_sort(Symbols.begin(), Symbols.end(), + [](const SymbolTableEntry &L, const SymbolTableEntry &R) { + return getSymbolBinding(L.Symbol) == STB_LOCAL && + getSymbolBinding(R.Symbol) != STB_LOCAL; + }); + return; + } + if (In<ELFT>::GnuHashTab) + // NB: It also sorts Symbols to meet the GNU hash table requirements. + In<ELFT>::GnuHashTab->addSymbols(Symbols); + else if (Config->EMachine == EM_MIPS) + std::stable_sort(Symbols.begin(), Symbols.end(), + [](const SymbolTableEntry &L, const SymbolTableEntry &R) { + return sortMipsSymbols(L.Symbol, R.Symbol); + }); + size_t I = 0; + for (const SymbolTableEntry &S : Symbols) + S.Symbol->DynsymIndex = ++I; +} + +template <class ELFT> void SymbolTableSection<ELFT>::addSymbol(SymbolBody *B) { + Symbols.push_back({B, StrTabSec.addString(B->getName(), false)}); +} + +template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) { + Buf += sizeof(Elf_Sym); + + // All symbols with STB_LOCAL binding precede the weak and global symbols. + // .dynsym only contains global symbols. + if (Config->Discard != DiscardPolicy::All && !StrTabSec.isDynamic()) + writeLocalSymbols(Buf); + + writeGlobalSymbols(Buf); +} + +template <class ELFT> +void SymbolTableSection<ELFT>::writeLocalSymbols(uint8_t *&Buf) { + // Iterate over all input object files to copy their local symbols + // to the output symbol table pointed by Buf. + for (ObjectFile<ELFT> *File : Symtab<ELFT>::X->getObjectFiles()) { + for (const std::pair<const DefinedRegular<ELFT> *, size_t> &P : + File->KeptLocalSyms) { + const DefinedRegular<ELFT> &Body = *P.first; + InputSectionBase<ELFT> *Section = Body.Section; + auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + + if (!Section) { + ESym->st_shndx = SHN_ABS; + ESym->st_value = Body.Value; + } else { + const OutputSectionBase *OutSec = Section->OutSec; + ESym->st_shndx = OutSec->SectionIndex; + ESym->st_value = OutSec->Addr + Section->getOffset(Body); + } + ESym->st_name = P.second; + ESym->st_size = Body.template getSize<ELFT>(); + ESym->setBindingAndType(STB_LOCAL, Body.Type); + Buf += sizeof(*ESym); + } + } +} + +template <class ELFT> +void SymbolTableSection<ELFT>::writeGlobalSymbols(uint8_t *Buf) { + // Write the internal symbol table contents to the output symbol table + // pointed by Buf. + auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + for (const SymbolTableEntry &S : Symbols) { + SymbolBody *Body = S.Symbol; + size_t StrOff = S.StrTabOffset; + + uint8_t Type = Body->Type; + uintX_t Size = Body->getSize<ELFT>(); + + ESym->setBindingAndType(getSymbolBinding(Body), Type); + ESym->st_size = Size; + ESym->st_name = StrOff; + ESym->setVisibility(Body->symbol()->Visibility); + ESym->st_value = Body->getVA<ELFT>(); + + if (const OutputSectionBase *OutSec = getOutputSection(Body)) + ESym->st_shndx = OutSec->SectionIndex; + else if (isa<DefinedRegular<ELFT>>(Body)) + ESym->st_shndx = SHN_ABS; + + if (Config->EMachine == EM_MIPS) { + // On MIPS we need to mark symbol which has a PLT entry and requires + // pointer equality by STO_MIPS_PLT flag. That is necessary to help + // dynamic linker distinguish such symbols and MIPS lazy-binding stubs. + // https://sourceware.org/ml/binutils/2008-07/txt00000.txt + if (Body->isInPlt() && Body->NeedsCopyOrPltAddr) + ESym->st_other |= STO_MIPS_PLT; + if (Config->Relocatable) { + auto *D = dyn_cast<DefinedRegular<ELFT>>(Body); + if (D && D->isMipsPIC()) + ESym->st_other |= STO_MIPS_PIC; + } + } + ++ESym; + } +} + +template <class ELFT> +const OutputSectionBase * +SymbolTableSection<ELFT>::getOutputSection(SymbolBody *Sym) { + switch (Sym->kind()) { + case SymbolBody::DefinedSyntheticKind: + return cast<DefinedSynthetic>(Sym)->Section; + case SymbolBody::DefinedRegularKind: { + auto &D = cast<DefinedRegular<ELFT>>(*Sym); + if (D.Section) + return D.Section->OutSec; + break; + } + case SymbolBody::DefinedCommonKind: + return In<ELFT>::Common->OutSec; + case SymbolBody::SharedKind: + if (cast<SharedSymbol<ELFT>>(Sym)->needsCopy()) + return Out<ELFT>::Bss; + break; + case SymbolBody::UndefinedKind: + case SymbolBody::LazyArchiveKind: + case SymbolBody::LazyObjectKind: + break; + } + return nullptr; +} + +template <class ELFT> +GnuHashTableSection<ELFT>::GnuHashTableSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_HASH, sizeof(uintX_t), + ".gnu.hash") { + this->Entsize = ELFT::Is64Bits ? 0 : 4; +} + +template <class ELFT> +unsigned GnuHashTableSection<ELFT>::calcNBuckets(unsigned NumHashed) { + if (!NumHashed) + return 0; + + // These values are prime numbers which are not greater than 2^(N-1) + 1. + // In result, for any particular NumHashed we return a prime number + // which is not greater than NumHashed. + static const unsigned Primes[] = { + 1, 1, 3, 3, 7, 13, 31, 61, 127, 251, + 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071}; + + return Primes[std::min<unsigned>(Log2_32_Ceil(NumHashed), + array_lengthof(Primes) - 1)]; +} + +// Bloom filter estimation: at least 8 bits for each hashed symbol. +// GNU Hash table requirement: it should be a power of 2, +// the minimum value is 1, even for an empty table. +// Expected results for a 32-bit target: +// calcMaskWords(0..4) = 1 +// calcMaskWords(5..8) = 2 +// calcMaskWords(9..16) = 4 +// For a 64-bit target: +// calcMaskWords(0..8) = 1 +// calcMaskWords(9..16) = 2 +// calcMaskWords(17..32) = 4 +template <class ELFT> +unsigned GnuHashTableSection<ELFT>::calcMaskWords(unsigned NumHashed) { + if (!NumHashed) + return 1; + return NextPowerOf2((NumHashed - 1) / sizeof(Elf_Off)); +} + +template <class ELFT> void GnuHashTableSection<ELFT>::finalize() { + unsigned NumHashed = Symbols.size(); + NBuckets = calcNBuckets(NumHashed); + MaskWords = calcMaskWords(NumHashed); + // Second hash shift estimation: just predefined values. + Shift2 = ELFT::Is64Bits ? 6 : 5; + + this->OutSec->Entsize = this->Entsize; + this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; + this->Size = sizeof(Elf_Word) * 4 // Header + + sizeof(Elf_Off) * MaskWords // Bloom Filter + + sizeof(Elf_Word) * NBuckets // Hash Buckets + + sizeof(Elf_Word) * NumHashed; // Hash Values +} + +template <class ELFT> void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) { + writeHeader(Buf); + if (Symbols.empty()) + return; + writeBloomFilter(Buf); + writeHashTable(Buf); +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeHeader(uint8_t *&Buf) { + auto *P = reinterpret_cast<Elf_Word *>(Buf); + *P++ = NBuckets; + *P++ = In<ELFT>::DynSymTab->getNumSymbols() - Symbols.size(); + *P++ = MaskWords; + *P++ = Shift2; + Buf = reinterpret_cast<uint8_t *>(P); +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *&Buf) { + unsigned C = sizeof(Elf_Off) * 8; + + auto *Masks = reinterpret_cast<Elf_Off *>(Buf); + for (const SymbolData &Sym : Symbols) { + size_t Pos = (Sym.Hash / C) & (MaskWords - 1); + uintX_t V = (uintX_t(1) << (Sym.Hash % C)) | + (uintX_t(1) << ((Sym.Hash >> Shift2) % C)); + Masks[Pos] |= V; + } + Buf += sizeof(Elf_Off) * MaskWords; +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeHashTable(uint8_t *Buf) { + Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf); + Elf_Word *Values = Buckets + NBuckets; + + int PrevBucket = -1; + int I = 0; + for (const SymbolData &Sym : Symbols) { + int Bucket = Sym.Hash % NBuckets; + assert(PrevBucket <= Bucket); + if (Bucket != PrevBucket) { + Buckets[Bucket] = Sym.Body->DynsymIndex; + PrevBucket = Bucket; + if (I > 0) + Values[I - 1] |= 1; + } + Values[I] = Sym.Hash & ~1; + ++I; + } + if (I > 0) + Values[I - 1] |= 1; +} + +static uint32_t hashGnu(StringRef Name) { + uint32_t H = 5381; + for (uint8_t C : Name) + H = (H << 5) + H + C; + return H; +} + +// Add symbols to this symbol hash table. Note that this function +// destructively sort a given vector -- which is needed because +// GNU-style hash table places some sorting requirements. +template <class ELFT> +void GnuHashTableSection<ELFT>::addSymbols(std::vector<SymbolTableEntry> &V) { + // Ideally this will just be 'auto' but GCC 6.1 is not able + // to deduce it correctly. + std::vector<SymbolTableEntry>::iterator Mid = + std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) { + return S.Symbol->isUndefined(); + }); + if (Mid == V.end()) + return; + for (auto I = Mid, E = V.end(); I != E; ++I) { + SymbolBody *B = I->Symbol; + size_t StrOff = I->StrTabOffset; + Symbols.push_back({B, StrOff, hashGnu(B->getName())}); + } + + unsigned NBuckets = calcNBuckets(Symbols.size()); + std::stable_sort(Symbols.begin(), Symbols.end(), + [&](const SymbolData &L, const SymbolData &R) { + return L.Hash % NBuckets < R.Hash % NBuckets; + }); + + V.erase(Mid, V.end()); + for (const SymbolData &Sym : Symbols) + V.push_back({Sym.Body, Sym.STName}); +} + +template <class ELFT> +HashTableSection<ELFT>::HashTableSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_HASH, sizeof(Elf_Word), ".hash") { + this->Entsize = sizeof(Elf_Word); +} + +template <class ELFT> void HashTableSection<ELFT>::finalize() { + this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; + this->OutSec->Entsize = this->Entsize; + + unsigned NumEntries = 2; // nbucket and nchain. + NumEntries += In<ELFT>::DynSymTab->getNumSymbols(); // The chain entries. + + // Create as many buckets as there are symbols. + // FIXME: This is simplistic. We can try to optimize it, but implementing + // support for SHT_GNU_HASH is probably even more profitable. + NumEntries += In<ELFT>::DynSymTab->getNumSymbols(); + this->Size = NumEntries * sizeof(Elf_Word); +} + +template <class ELFT> void HashTableSection<ELFT>::writeTo(uint8_t *Buf) { + unsigned NumSymbols = In<ELFT>::DynSymTab->getNumSymbols(); + auto *P = reinterpret_cast<Elf_Word *>(Buf); + *P++ = NumSymbols; // nbucket + *P++ = NumSymbols; // nchain + + Elf_Word *Buckets = P; + Elf_Word *Chains = P + NumSymbols; + + for (const SymbolTableEntry &S : In<ELFT>::DynSymTab->getSymbols()) { + SymbolBody *Body = S.Symbol; + StringRef Name = Body->getName(); + unsigned I = Body->DynsymIndex; + uint32_t Hash = hashSysV(Name) % NumSymbols; + Chains[I] = Buckets[Hash]; + Buckets[Hash] = I; + } +} + +template <class ELFT> +PltSection<ELFT>::PltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, + ".plt") {} + +template <class ELFT> void PltSection<ELFT>::writeTo(uint8_t *Buf) { + // At beginning of PLT, we have code to call the dynamic linker + // to resolve dynsyms at runtime. Write such code. + Target->writePltHeader(Buf); + size_t Off = Target->PltHeaderSize; + + for (auto &I : Entries) { + const SymbolBody *B = I.first; + unsigned RelOff = I.second; + uint64_t Got = B->getGotPltVA<ELFT>(); + uint64_t Plt = this->getVA() + Off; + Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); + Off += Target->PltEntrySize; + } +} + +template <class ELFT> void PltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.PltIndex = Entries.size(); + unsigned RelOff = In<ELFT>::RelaPlt->getRelocOffset(); + Entries.push_back(std::make_pair(&Sym, RelOff)); +} + +template <class ELFT> size_t PltSection<ELFT>::getSize() const { + return Target->PltHeaderSize + Entries.size() * Target->PltEntrySize; +} + +template <class ELFT> +IpltSection<ELFT>::IpltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, + ".plt") {} + +template <class ELFT> void IpltSection<ELFT>::writeTo(uint8_t *Buf) { + // The IRelative relocations do not support lazy binding so no header is + // needed + size_t Off = 0; + for (auto &I : Entries) { + const SymbolBody *B = I.first; + unsigned RelOff = I.second + In<ELFT>::Plt->getSize(); + uint64_t Got = B->getGotPltVA<ELFT>(); + uint64_t Plt = this->getVA() + Off; + Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); + Off += Target->PltEntrySize; + } +} + +template <class ELFT> void IpltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.PltIndex = Entries.size(); + Sym.IsInIplt = true; + unsigned RelOff = In<ELFT>::RelaIplt->getRelocOffset(); + Entries.push_back(std::make_pair(&Sym, RelOff)); +} + +template <class ELFT> size_t IpltSection<ELFT>::getSize() const { + return Entries.size() * Target->PltEntrySize; +} + +template <class ELFT> +GdbIndexSection<ELFT>::GdbIndexSection() + : SyntheticSection<ELFT>(0, SHT_PROGBITS, 1, ".gdb_index"), + StringPool(llvm::StringTableBuilder::ELF) {} + +template <class ELFT> void GdbIndexSection<ELFT>::parseDebugSections() { + for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) + if (InputSection<ELFT> *IS = dyn_cast<InputSection<ELFT>>(S)) + if (IS->OutSec && IS->Name == ".debug_info") + readDwarf(IS); +} + +// Iterative hash function for symbol's name is described in .gdb_index format +// specification. Note that we use one for version 5 to 7 here, it is different +// for version 4. +static uint32_t hash(StringRef Str) { + uint32_t R = 0; + for (uint8_t C : Str) + R = R * 67 + tolower(C) - 113; + return R; +} + +template <class ELFT> +void GdbIndexSection<ELFT>::readDwarf(InputSection<ELFT> *I) { + GdbIndexBuilder<ELFT> Builder(I); + if (ErrorCount) + return; + + size_t CuId = CompilationUnits.size(); + std::vector<std::pair<uintX_t, uintX_t>> CuList = Builder.readCUList(); + CompilationUnits.insert(CompilationUnits.end(), CuList.begin(), CuList.end()); + + std::vector<AddressEntry<ELFT>> AddrArea = Builder.readAddressArea(CuId); + AddressArea.insert(AddressArea.end(), AddrArea.begin(), AddrArea.end()); + + std::vector<std::pair<StringRef, uint8_t>> NamesAndTypes = + Builder.readPubNamesAndTypes(); + + for (std::pair<StringRef, uint8_t> &Pair : NamesAndTypes) { + uint32_t Hash = hash(Pair.first); + size_t Offset = StringPool.add(Pair.first); + + bool IsNew; + GdbSymbol *Sym; + std::tie(IsNew, Sym) = SymbolTable.add(Hash, Offset); + if (IsNew) { + Sym->CuVectorIndex = CuVectors.size(); + CuVectors.push_back({{CuId, Pair.second}}); + continue; + } + + std::vector<std::pair<uint32_t, uint8_t>> &CuVec = + CuVectors[Sym->CuVectorIndex]; + CuVec.push_back({CuId, Pair.second}); + } +} + +template <class ELFT> void GdbIndexSection<ELFT>::finalize() { + if (Finalized) + return; + Finalized = true; + + parseDebugSections(); + + // GdbIndex header consist from version fields + // and 5 more fields with different kinds of offsets. + CuTypesOffset = CuListOffset + CompilationUnits.size() * CompilationUnitSize; + SymTabOffset = CuTypesOffset + AddressArea.size() * AddressEntrySize; + + ConstantPoolOffset = + SymTabOffset + SymbolTable.getCapacity() * SymTabEntrySize; + + for (std::vector<std::pair<uint32_t, uint8_t>> &CuVec : CuVectors) { + CuVectorsOffset.push_back(CuVectorsSize); + CuVectorsSize += OffsetTypeSize * (CuVec.size() + 1); + } + StringPoolOffset = ConstantPoolOffset + CuVectorsSize; + + StringPool.finalizeInOrder(); +} + +template <class ELFT> size_t GdbIndexSection<ELFT>::getSize() const { + const_cast<GdbIndexSection<ELFT> *>(this)->finalize(); + return StringPoolOffset + StringPool.getSize(); +} + +template <class ELFT> void GdbIndexSection<ELFT>::writeTo(uint8_t *Buf) { + write32le(Buf, 7); // Write version. + write32le(Buf + 4, CuListOffset); // CU list offset. + write32le(Buf + 8, CuTypesOffset); // Types CU list offset. + write32le(Buf + 12, CuTypesOffset); // Address area offset. + write32le(Buf + 16, SymTabOffset); // Symbol table offset. + write32le(Buf + 20, ConstantPoolOffset); // Constant pool offset. + Buf += 24; + + // Write the CU list. + for (std::pair<uintX_t, uintX_t> CU : CompilationUnits) { + write64le(Buf, CU.first); + write64le(Buf + 8, CU.second); + Buf += 16; + } + + // Write the address area. + for (AddressEntry<ELFT> &E : AddressArea) { + uintX_t BaseAddr = E.Section->OutSec->Addr + E.Section->getOffset(0); + write64le(Buf, BaseAddr + E.LowAddress); + write64le(Buf + 8, BaseAddr + E.HighAddress); + write32le(Buf + 16, E.CuIndex); + Buf += 20; + } + + // Write the symbol table. + for (size_t I = 0; I < SymbolTable.getCapacity(); ++I) { + GdbSymbol *Sym = SymbolTable.getSymbol(I); + if (Sym) { + size_t NameOffset = + Sym->NameOffset + StringPoolOffset - ConstantPoolOffset; + size_t CuVectorOffset = CuVectorsOffset[Sym->CuVectorIndex]; + write32le(Buf, NameOffset); + write32le(Buf + 4, CuVectorOffset); + } + Buf += 8; + } + + // Write the CU vectors into the constant pool. + for (std::vector<std::pair<uint32_t, uint8_t>> &CuVec : CuVectors) { + write32le(Buf, CuVec.size()); + Buf += 4; + for (std::pair<uint32_t, uint8_t> &P : CuVec) { + uint32_t Index = P.first; + uint8_t Flags = P.second; + Index |= Flags << 24; + write32le(Buf, Index); + Buf += 4; + } + } + + StringPool.write(Buf); +} + +template <class ELFT> bool GdbIndexSection<ELFT>::empty() const { + return !Out<ELFT>::DebugInfo; +} + +template <class ELFT> +EhFrameHeader<ELFT>::EhFrameHeader() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame_hdr") {} + +// .eh_frame_hdr contains a binary search table of pointers to FDEs. +// Each entry of the search table consists of two values, +// the starting PC from where FDEs covers, and the FDE's address. +// It is sorted by PC. +template <class ELFT> void EhFrameHeader<ELFT>::writeTo(uint8_t *Buf) { + const endianness E = ELFT::TargetEndianness; + + // Sort the FDE list by their PC and uniqueify. Usually there is only + // one FDE for a PC (i.e. function), but if ICF merges two functions + // into one, there can be more than one FDEs pointing to the address. + auto Less = [](const FdeData &A, const FdeData &B) { return A.Pc < B.Pc; }; + std::stable_sort(Fdes.begin(), Fdes.end(), Less); + auto Eq = [](const FdeData &A, const FdeData &B) { return A.Pc == B.Pc; }; + Fdes.erase(std::unique(Fdes.begin(), Fdes.end(), Eq), Fdes.end()); + + Buf[0] = 1; + Buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; + Buf[2] = DW_EH_PE_udata4; + Buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; + write32<E>(Buf + 4, Out<ELFT>::EhFrame->Addr - this->getVA() - 4); + write32<E>(Buf + 8, Fdes.size()); + Buf += 12; + + uintX_t VA = this->getVA(); + for (FdeData &Fde : Fdes) { + write32<E>(Buf, Fde.Pc - VA); + write32<E>(Buf + 4, Fde.FdeVA - VA); + Buf += 8; + } +} + +template <class ELFT> size_t EhFrameHeader<ELFT>::getSize() const { + // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. + return 12 + Out<ELFT>::EhFrame->NumFdes * 8; +} + +template <class ELFT> +void EhFrameHeader<ELFT>::addFde(uint32_t Pc, uint32_t FdeVA) { + Fdes.push_back({Pc, FdeVA}); +} + +template <class ELFT> bool EhFrameHeader<ELFT>::empty() const { + return Out<ELFT>::EhFrame->empty(); +} + +template <class ELFT> +VersionDefinitionSection<ELFT>::VersionDefinitionSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t), + ".gnu.version_d") {} + +static StringRef getFileDefName() { + if (!Config->SoName.empty()) + return Config->SoName; + return Config->OutputFile; +} + +template <class ELFT> void VersionDefinitionSection<ELFT>::finalize() { + FileDefNameOff = In<ELFT>::DynStrTab->addString(getFileDefName()); + for (VersionDefinition &V : Config->VersionDefinitions) + V.NameOff = In<ELFT>::DynStrTab->addString(V.Name); + + this->OutSec->Link = this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + + // sh_info should be set to the number of definitions. This fact is missed in + // documentation, but confirmed by binutils community: + // https://sourceware.org/ml/binutils/2014-11/msg00355.html + this->OutSec->Info = this->Info = getVerDefNum(); +} + +template <class ELFT> +void VersionDefinitionSection<ELFT>::writeOne(uint8_t *Buf, uint32_t Index, + StringRef Name, size_t NameOff) { + auto *Verdef = reinterpret_cast<Elf_Verdef *>(Buf); + Verdef->vd_version = 1; + Verdef->vd_cnt = 1; + Verdef->vd_aux = sizeof(Elf_Verdef); + Verdef->vd_next = sizeof(Elf_Verdef) + sizeof(Elf_Verdaux); + Verdef->vd_flags = (Index == 1 ? VER_FLG_BASE : 0); + Verdef->vd_ndx = Index; + Verdef->vd_hash = hashSysV(Name); + + auto *Verdaux = reinterpret_cast<Elf_Verdaux *>(Buf + sizeof(Elf_Verdef)); + Verdaux->vda_name = NameOff; + Verdaux->vda_next = 0; +} + +template <class ELFT> +void VersionDefinitionSection<ELFT>::writeTo(uint8_t *Buf) { + writeOne(Buf, 1, getFileDefName(), FileDefNameOff); + + for (VersionDefinition &V : Config->VersionDefinitions) { + Buf += sizeof(Elf_Verdef) + sizeof(Elf_Verdaux); + writeOne(Buf, V.Id, V.Name, V.NameOff); + } + + // Need to terminate the last version definition. + Elf_Verdef *Verdef = reinterpret_cast<Elf_Verdef *>(Buf); + Verdef->vd_next = 0; +} + +template <class ELFT> size_t VersionDefinitionSection<ELFT>::getSize() const { + return (sizeof(Elf_Verdef) + sizeof(Elf_Verdaux)) * getVerDefNum(); +} + +template <class ELFT> +VersionTableSection<ELFT>::VersionTableSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t), + ".gnu.version") {} + +template <class ELFT> void VersionTableSection<ELFT>::finalize() { + this->OutSec->Entsize = this->Entsize = sizeof(Elf_Versym); + // At the moment of june 2016 GNU docs does not mention that sh_link field + // should be set, but Sun docs do. Also readelf relies on this field. + this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; +} + +template <class ELFT> size_t VersionTableSection<ELFT>::getSize() const { + return sizeof(Elf_Versym) * (In<ELFT>::DynSymTab->getSymbols().size() + 1); +} + +template <class ELFT> void VersionTableSection<ELFT>::writeTo(uint8_t *Buf) { + auto *OutVersym = reinterpret_cast<Elf_Versym *>(Buf) + 1; + for (const SymbolTableEntry &S : In<ELFT>::DynSymTab->getSymbols()) { + OutVersym->vs_index = S.Symbol->symbol()->VersionId; + ++OutVersym; + } +} + +template <class ELFT> bool VersionTableSection<ELFT>::empty() const { + return !In<ELFT>::VerDef && In<ELFT>::VerNeed->empty(); +} + +template <class ELFT> +VersionNeedSection<ELFT>::VersionNeedSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t), + ".gnu.version_r") { + // Identifiers in verneed section start at 2 because 0 and 1 are reserved + // for VER_NDX_LOCAL and VER_NDX_GLOBAL. + // First identifiers are reserved by verdef section if it exist. + NextIndex = getVerDefNum() + 1; +} + +template <class ELFT> +void VersionNeedSection<ELFT>::addSymbol(SharedSymbol<ELFT> *SS) { + if (!SS->Verdef) { + SS->symbol()->VersionId = VER_NDX_GLOBAL; + return; + } + SharedFile<ELFT> *F = SS->file(); + // If we don't already know that we need an Elf_Verneed for this DSO, prepare + // to create one by adding it to our needed list and creating a dynstr entry + // for the soname. + if (F->VerdefMap.empty()) + Needed.push_back({F, In<ELFT>::DynStrTab->addString(F->getSoName())}); + typename SharedFile<ELFT>::NeededVer &NV = F->VerdefMap[SS->Verdef]; + // If we don't already know that we need an Elf_Vernaux for this Elf_Verdef, + // prepare to create one by allocating a version identifier and creating a + // dynstr entry for the version name. + if (NV.Index == 0) { + NV.StrTab = In<ELFT>::DynStrTab->addString( + SS->file()->getStringTable().data() + SS->Verdef->getAux()->vda_name); + NV.Index = NextIndex++; + } + SS->symbol()->VersionId = NV.Index; +} + +template <class ELFT> void VersionNeedSection<ELFT>::writeTo(uint8_t *Buf) { + // The Elf_Verneeds need to appear first, followed by the Elf_Vernauxs. + auto *Verneed = reinterpret_cast<Elf_Verneed *>(Buf); + auto *Vernaux = reinterpret_cast<Elf_Vernaux *>(Verneed + Needed.size()); + + for (std::pair<SharedFile<ELFT> *, size_t> &P : Needed) { + // Create an Elf_Verneed for this DSO. + Verneed->vn_version = 1; + Verneed->vn_cnt = P.first->VerdefMap.size(); + Verneed->vn_file = P.second; + Verneed->vn_aux = + reinterpret_cast<char *>(Vernaux) - reinterpret_cast<char *>(Verneed); + Verneed->vn_next = sizeof(Elf_Verneed); + ++Verneed; + + // Create the Elf_Vernauxs for this Elf_Verneed. The loop iterates over + // VerdefMap, which will only contain references to needed version + // definitions. Each Elf_Vernaux is based on the information contained in + // the Elf_Verdef in the source DSO. This loop iterates over a std::map of + // pointers, but is deterministic because the pointers refer to Elf_Verdef + // data structures within a single input file. + for (auto &NV : P.first->VerdefMap) { + Vernaux->vna_hash = NV.first->vd_hash; + Vernaux->vna_flags = 0; + Vernaux->vna_other = NV.second.Index; + Vernaux->vna_name = NV.second.StrTab; + Vernaux->vna_next = sizeof(Elf_Vernaux); + ++Vernaux; + } + + Vernaux[-1].vna_next = 0; + } + Verneed[-1].vn_next = 0; +} + +template <class ELFT> void VersionNeedSection<ELFT>::finalize() { + this->OutSec->Link = this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + this->OutSec->Info = this->Info = Needed.size(); +} + +template <class ELFT> size_t VersionNeedSection<ELFT>::getSize() const { + unsigned Size = Needed.size() * sizeof(Elf_Verneed); + for (const std::pair<SharedFile<ELFT> *, size_t> &P : Needed) + Size += P.first->VerdefMap.size() * sizeof(Elf_Vernaux); + return Size; +} + +template <class ELFT> bool VersionNeedSection<ELFT>::empty() const { + return getNeedNum() == 0; +} + +template <class ELFT> +MipsRldMapSection<ELFT>::MipsRldMapSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + sizeof(typename ELFT::uint), ".rld_map") {} + +template <class ELFT> void MipsRldMapSection<ELFT>::writeTo(uint8_t *Buf) { + // Apply filler from linker script. + uint64_t Filler = Script<ELFT>::X->getFiller(this->Name); + Filler = (Filler << 32) | Filler; + memcpy(Buf, &Filler, getSize()); +} + +template <class ELFT> +ARMExidxSentinelSection<ELFT>::ARMExidxSentinelSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX, + sizeof(typename ELFT::uint), ".ARM.exidx") {} + +// Write a terminating sentinel entry to the end of the .ARM.exidx table. +// This section will have been sorted last in the .ARM.exidx table. +// This table entry will have the form: +// | PREL31 upper bound of code that has exception tables | EXIDX_CANTUNWIND | +template <class ELFT> +void ARMExidxSentinelSection<ELFT>::writeTo(uint8_t *Buf) { + // Get the InputSection before us, we are by definition last + auto RI = cast<OutputSection<ELFT>>(this->OutSec)->Sections.rbegin(); + InputSection<ELFT> *LE = *(++RI); + InputSection<ELFT> *LC = cast<InputSection<ELFT>>(LE->getLinkOrderDep()); + uint64_t S = LC->OutSec->Addr + LC->getOffset(LC->getSize()); + uint64_t P = this->getVA(); + Target->relocateOne(Buf, R_ARM_PREL31, S - P); + write32le(Buf + 4, 0x1); +} + +template InputSection<ELF32LE> *elf::createCommonSection(); +template InputSection<ELF32BE> *elf::createCommonSection(); +template InputSection<ELF64LE> *elf::createCommonSection(); +template InputSection<ELF64BE> *elf::createCommonSection(); + +template InputSection<ELF32LE> *elf::createInterpSection(); +template InputSection<ELF32BE> *elf::createInterpSection(); +template InputSection<ELF64LE> *elf::createInterpSection(); +template InputSection<ELF64BE> *elf::createInterpSection(); + +template MergeInputSection<ELF32LE> *elf::createCommentSection(); +template MergeInputSection<ELF32BE> *elf::createCommentSection(); +template MergeInputSection<ELF64LE> *elf::createCommentSection(); +template MergeInputSection<ELF64BE> *elf::createCommentSection(); + +template class elf::MipsAbiFlagsSection<ELF32LE>; +template class elf::MipsAbiFlagsSection<ELF32BE>; +template class elf::MipsAbiFlagsSection<ELF64LE>; +template class elf::MipsAbiFlagsSection<ELF64BE>; + +template class elf::MipsOptionsSection<ELF32LE>; +template class elf::MipsOptionsSection<ELF32BE>; +template class elf::MipsOptionsSection<ELF64LE>; +template class elf::MipsOptionsSection<ELF64BE>; + +template class elf::MipsReginfoSection<ELF32LE>; +template class elf::MipsReginfoSection<ELF32BE>; +template class elf::MipsReginfoSection<ELF64LE>; +template class elf::MipsReginfoSection<ELF64BE>; + +template class elf::BuildIdSection<ELF32LE>; +template class elf::BuildIdSection<ELF32BE>; +template class elf::BuildIdSection<ELF64LE>; +template class elf::BuildIdSection<ELF64BE>; + +template class elf::GotSection<ELF32LE>; +template class elf::GotSection<ELF32BE>; +template class elf::GotSection<ELF64LE>; +template class elf::GotSection<ELF64BE>; + +template class elf::MipsGotSection<ELF32LE>; +template class elf::MipsGotSection<ELF32BE>; +template class elf::MipsGotSection<ELF64LE>; +template class elf::MipsGotSection<ELF64BE>; + +template class elf::GotPltSection<ELF32LE>; +template class elf::GotPltSection<ELF32BE>; +template class elf::GotPltSection<ELF64LE>; +template class elf::GotPltSection<ELF64BE>; + +template class elf::IgotPltSection<ELF32LE>; +template class elf::IgotPltSection<ELF32BE>; +template class elf::IgotPltSection<ELF64LE>; +template class elf::IgotPltSection<ELF64BE>; + +template class elf::StringTableSection<ELF32LE>; +template class elf::StringTableSection<ELF32BE>; +template class elf::StringTableSection<ELF64LE>; +template class elf::StringTableSection<ELF64BE>; + +template class elf::DynamicSection<ELF32LE>; +template class elf::DynamicSection<ELF32BE>; +template class elf::DynamicSection<ELF64LE>; +template class elf::DynamicSection<ELF64BE>; + +template class elf::RelocationSection<ELF32LE>; +template class elf::RelocationSection<ELF32BE>; +template class elf::RelocationSection<ELF64LE>; +template class elf::RelocationSection<ELF64BE>; + +template class elf::SymbolTableSection<ELF32LE>; +template class elf::SymbolTableSection<ELF32BE>; +template class elf::SymbolTableSection<ELF64LE>; +template class elf::SymbolTableSection<ELF64BE>; + +template class elf::GnuHashTableSection<ELF32LE>; +template class elf::GnuHashTableSection<ELF32BE>; +template class elf::GnuHashTableSection<ELF64LE>; +template class elf::GnuHashTableSection<ELF64BE>; + +template class elf::HashTableSection<ELF32LE>; +template class elf::HashTableSection<ELF32BE>; +template class elf::HashTableSection<ELF64LE>; +template class elf::HashTableSection<ELF64BE>; + +template class elf::PltSection<ELF32LE>; +template class elf::PltSection<ELF32BE>; +template class elf::PltSection<ELF64LE>; +template class elf::PltSection<ELF64BE>; + +template class elf::IpltSection<ELF32LE>; +template class elf::IpltSection<ELF32BE>; +template class elf::IpltSection<ELF64LE>; +template class elf::IpltSection<ELF64BE>; + +template class elf::GdbIndexSection<ELF32LE>; +template class elf::GdbIndexSection<ELF32BE>; +template class elf::GdbIndexSection<ELF64LE>; +template class elf::GdbIndexSection<ELF64BE>; + +template class elf::EhFrameHeader<ELF32LE>; +template class elf::EhFrameHeader<ELF32BE>; +template class elf::EhFrameHeader<ELF64LE>; +template class elf::EhFrameHeader<ELF64BE>; + +template class elf::VersionTableSection<ELF32LE>; +template class elf::VersionTableSection<ELF32BE>; +template class elf::VersionTableSection<ELF64LE>; +template class elf::VersionTableSection<ELF64BE>; + +template class elf::VersionNeedSection<ELF32LE>; +template class elf::VersionNeedSection<ELF32BE>; +template class elf::VersionNeedSection<ELF64LE>; +template class elf::VersionNeedSection<ELF64BE>; + +template class elf::VersionDefinitionSection<ELF32LE>; +template class elf::VersionDefinitionSection<ELF32BE>; +template class elf::VersionDefinitionSection<ELF64LE>; +template class elf::VersionDefinitionSection<ELF64BE>; + +template class elf::MipsRldMapSection<ELF32LE>; +template class elf::MipsRldMapSection<ELF32BE>; +template class elf::MipsRldMapSection<ELF64LE>; +template class elf::MipsRldMapSection<ELF64BE>; + +template class elf::ARMExidxSentinelSection<ELF32LE>; +template class elf::ARMExidxSentinelSection<ELF32BE>; +template class elf::ARMExidxSentinelSection<ELF64LE>; +template class elf::ARMExidxSentinelSection<ELF64BE>; diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.h b/contrib/llvm/tools/lld/ELF/SyntheticSections.h new file mode 100644 index 000000000000..dfefb3821e75 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.h @@ -0,0 +1,747 @@ +//===- SyntheticSection.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYNTHETIC_SECTION_H +#define LLD_ELF_SYNTHETIC_SECTION_H + +#include "GdbIndex.h" +#include "InputSection.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/MC/StringTableBuilder.h" + +namespace lld { +namespace elf { + +template <class ELFT> class SyntheticSection : public InputSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + SyntheticSection(uintX_t Flags, uint32_t Type, uintX_t Addralign, + StringRef Name) + : InputSection<ELFT>(Flags, Type, Addralign, ArrayRef<uint8_t>(), Name, + InputSectionData::Synthetic) { + this->Live = true; + } + + virtual ~SyntheticSection() = default; + virtual void writeTo(uint8_t *Buf) = 0; + virtual size_t getSize() const = 0; + virtual void finalize() {} + virtual bool empty() const { return false; } + + uintX_t getVA() const { + return this->OutSec ? this->OutSec->Addr + this->OutSecOff : 0; + } + + static bool classof(const InputSectionData *D) { + return D->kind() == InputSectionData::Synthetic; + } +}; + +template <class ELFT> class GotSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + GotSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + void finalize() override; + bool empty() const override; + + void addEntry(SymbolBody &Sym); + bool addDynTlsEntry(SymbolBody &Sym); + bool addTlsIndex(); + uintX_t getGlobalDynAddr(const SymbolBody &B) const; + uintX_t getGlobalDynOffset(const SymbolBody &B) const; + + uintX_t getTlsIndexVA() { return this->getVA() + TlsIndexOff; } + uint32_t getTlsIndexOff() const { return TlsIndexOff; } + + // Flag to force GOT to be in output if we have relocations + // that relies on its address. + bool HasGotOffRel = false; + +private: + size_t NumEntries = 0; + uint32_t TlsIndexOff = -1; + uintX_t Size = 0; +}; + +// .note.gnu.build-id section. +template <class ELFT> class BuildIdSection : public SyntheticSection<ELFT> { + // First 16 bytes are a header. + static const unsigned HeaderSize = 16; + +public: + BuildIdSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return HeaderSize + HashSize; } + void writeBuildId(llvm::ArrayRef<uint8_t> Buf); + +private: + void computeHash(llvm::ArrayRef<uint8_t> Buf, + std::function<void(uint8_t *, ArrayRef<uint8_t>)> Hash); + + size_t HashSize; + uint8_t *HashBuf; +}; + +template <class ELFT> +class MipsGotSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + MipsGotSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + void finalize() override; + bool empty() const override; + void addEntry(SymbolBody &Sym, uintX_t Addend, RelExpr Expr); + bool addDynTlsEntry(SymbolBody &Sym); + bool addTlsIndex(); + uintX_t getPageEntryOffset(const SymbolBody &B, uintX_t Addend) const; + uintX_t getBodyEntryOffset(const SymbolBody &B, uintX_t Addend) const; + uintX_t getGlobalDynOffset(const SymbolBody &B) const; + + // Returns the symbol which corresponds to the first entry of the global part + // of GOT on MIPS platform. It is required to fill up MIPS-specific dynamic + // table properties. + // Returns nullptr if the global part is empty. + const SymbolBody *getFirstGlobalEntry() const; + + // Returns the number of entries in the local part of GOT including + // the number of reserved entries. + unsigned getLocalEntriesNum() const; + + // Returns offset of TLS part of the MIPS GOT table. This part goes + // after 'local' and 'global' entries. + uintX_t getTlsOffset() const; + + uint32_t getTlsIndexOff() const { return TlsIndexOff; } + + uintX_t getGp() const; + +private: + // MIPS GOT consists of three parts: local, global and tls. Each part + // contains different types of entries. Here is a layout of GOT: + // - Header entries | + // - Page entries | Local part + // - Local entries (16-bit access) | + // - Local entries (32-bit access) | + // - Normal global entries || Global part + // - Reloc-only global entries || + // - TLS entries ||| TLS part + // + // Header: + // Two entries hold predefined value 0x0 and 0x80000000. + // Page entries: + // These entries created by R_MIPS_GOT_PAGE relocation and R_MIPS_GOT16 + // relocation against local symbols. They are initialized by higher 16-bit + // of the corresponding symbol's value. So each 64kb of address space + // requires a single GOT entry. + // Local entries (16-bit access): + // These entries created by GOT relocations against global non-preemptible + // symbols so dynamic linker is not necessary to resolve the symbol's + // values. "16-bit access" means that corresponding relocations address + // GOT using 16-bit index. Each unique Symbol-Addend pair has its own + // GOT entry. + // Local entries (32-bit access): + // These entries are the same as above but created by relocations which + // address GOT using 32-bit index (R_MIPS_GOT_HI16/LO16 etc). + // Normal global entries: + // These entries created by GOT relocations against preemptible global + // symbols. They need to be initialized by dynamic linker and they ordered + // exactly as the corresponding entries in the dynamic symbols table. + // Reloc-only global entries: + // These entries created for symbols that are referenced by dynamic + // relocations R_MIPS_REL32. These entries are not accessed with gp-relative + // addressing, but MIPS ABI requires that these entries be present in GOT. + // TLS entries: + // Entries created by TLS relocations. + + // Number of "Header" entries. + static const unsigned HeaderEntriesNum = 2; + // Number of allocated "Page" entries. + uint32_t PageEntriesNum = 0; + // Map output sections referenced by MIPS GOT relocations + // to the first index of "Page" entries allocated for this section. + llvm::SmallMapVector<const OutputSectionBase *, size_t, 16> PageIndexMap; + + typedef std::pair<const SymbolBody *, uintX_t> GotEntry; + typedef std::vector<GotEntry> GotEntries; + // Map from Symbol-Addend pair to the GOT index. + llvm::DenseMap<GotEntry, size_t> EntryIndexMap; + // Local entries (16-bit access). + GotEntries LocalEntries; + // Local entries (32-bit access). + GotEntries LocalEntries32; + + // Normal and reloc-only global entries. + GotEntries GlobalEntries; + + // TLS entries. + std::vector<const SymbolBody *> TlsEntries; + + uint32_t TlsIndexOff = -1; + uintX_t Size = 0; +}; + +template <class ELFT> +class GotPltSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + GotPltSection(); + void addEntry(SymbolBody &Sym); + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override { return Entries.empty(); } + +private: + std::vector<const SymbolBody *> Entries; +}; + +// The IgotPltSection is a Got associated with the IpltSection for GNU Ifunc +// Symbols that will be relocated by Target->IRelativeRel. +// On most Targets the IgotPltSection will immediately follow the GotPltSection +// on ARM the IgotPltSection will immediately follow the GotSection. +template <class ELFT> +class IgotPltSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + IgotPltSection(); + void addEntry(SymbolBody &Sym); + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override { return Entries.empty(); } + +private: + std::vector<const SymbolBody *> Entries; +}; + +template <class ELFT> +class StringTableSection final : public SyntheticSection<ELFT> { +public: + typedef typename ELFT::uint uintX_t; + StringTableSection(StringRef Name, bool Dynamic); + unsigned addString(StringRef S, bool HashIt = true); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + bool isDynamic() const { return Dynamic; } + +private: + const bool Dynamic; + + // ELF string tables start with a NUL byte, so 1. + uintX_t Size = 1; + + llvm::DenseMap<StringRef, unsigned> StringMap; + std::vector<StringRef> Strings; +}; + +template <class ELFT> class DynamicReloc { + typedef typename ELFT::uint uintX_t; + +public: + DynamicReloc(uint32_t Type, const InputSectionBase<ELFT> *InputSec, + uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, + uintX_t Addend) + : Type(Type), Sym(Sym), InputSec(InputSec), OffsetInSec(OffsetInSec), + UseSymVA(UseSymVA), Addend(Addend) {} + + DynamicReloc(uint32_t Type, const OutputSectionBase *OutputSec, + uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, + uintX_t Addend) + : Type(Type), Sym(Sym), OutputSec(OutputSec), OffsetInSec(OffsetInSec), + UseSymVA(UseSymVA), Addend(Addend) {} + + uintX_t getOffset() const; + uintX_t getAddend() const; + uint32_t getSymIndex() const; + const OutputSectionBase *getOutputSec() const { return OutputSec; } + const InputSectionBase<ELFT> *getInputSec() const { return InputSec; } + + uint32_t Type; + +private: + SymbolBody *Sym; + const InputSectionBase<ELFT> *InputSec = nullptr; + const OutputSectionBase *OutputSec = nullptr; + uintX_t OffsetInSec; + bool UseSymVA; + uintX_t Addend; +}; + +template <class ELFT> +class DynamicSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Dyn Elf_Dyn; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + + // The .dynamic section contains information for the dynamic linker. + // The section consists of fixed size entries, which consist of + // type and value fields. Value are one of plain integers, symbol + // addresses, or section addresses. This struct represents the entry. + struct Entry { + int32_t Tag; + union { + OutputSectionBase *OutSec; + InputSection<ELFT> *InSec; + uint64_t Val; + const SymbolBody *Sym; + }; + enum KindT { SecAddr, SecSize, SymAddr, PlainInt, InSecAddr } Kind; + Entry(int32_t Tag, OutputSectionBase *OutSec, KindT Kind = SecAddr) + : Tag(Tag), OutSec(OutSec), Kind(Kind) {} + Entry(int32_t Tag, InputSection<ELFT> *Sec) + : Tag(Tag), InSec(Sec), Kind(InSecAddr) {} + Entry(int32_t Tag, uint64_t Val) : Tag(Tag), Val(Val), Kind(PlainInt) {} + Entry(int32_t Tag, const SymbolBody *Sym) + : Tag(Tag), Sym(Sym), Kind(SymAddr) {} + }; + + // finalize() fills this vector with the section contents. finalize() + // cannot directly create final section contents because when the + // function is called, symbol or section addresses are not fixed yet. + std::vector<Entry> Entries; + +public: + DynamicSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + +private: + void addEntries(); + void add(Entry E) { Entries.push_back(E); } + uintX_t Size = 0; +}; + +template <class ELFT> +class RelocationSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::uint uintX_t; + +public: + RelocationSection(StringRef Name, bool Sort); + void addReloc(const DynamicReloc<ELFT> &Reloc); + unsigned getRelocOffset(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + bool empty() const override { return Relocs.empty(); } + size_t getSize() const override { return Relocs.size() * this->Entsize; } + size_t getRelativeRelocCount() const { return NumRelativeRelocs; } + +private: + bool Sort; + size_t NumRelativeRelocs = 0; + std::vector<DynamicReloc<ELFT>> Relocs; +}; + +struct SymbolTableEntry { + SymbolBody *Symbol; + size_t StrTabOffset; +}; + +template <class ELFT> +class SymbolTableSection final : public SyntheticSection<ELFT> { +public: + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::SymRange Elf_Sym_Range; + typedef typename ELFT::uint uintX_t; + SymbolTableSection(StringTableSection<ELFT> &StrTabSec); + + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return getNumSymbols() * sizeof(Elf_Sym); } + void addSymbol(SymbolBody *Body); + StringTableSection<ELFT> &getStrTabSec() const { return StrTabSec; } + unsigned getNumSymbols() const { return NumLocals + Symbols.size() + 1; } + + ArrayRef<SymbolTableEntry> getSymbols() const { return Symbols; } + + unsigned NumLocals = 0; + StringTableSection<ELFT> &StrTabSec; + +private: + void writeLocalSymbols(uint8_t *&Buf); + void writeGlobalSymbols(uint8_t *Buf); + + const OutputSectionBase *getOutputSection(SymbolBody *Sym); + + // A vector of symbols and their string table offsets. + std::vector<SymbolTableEntry> Symbols; +}; + +// Outputs GNU Hash section. For detailed explanation see: +// https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections +template <class ELFT> +class GnuHashTableSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Off Elf_Off; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::uint uintX_t; + +public: + GnuHashTableSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return this->Size; } + + // Adds symbols to the hash table. + // Sorts the input to satisfy GNU hash section requirements. + void addSymbols(std::vector<SymbolTableEntry> &Symbols); + +private: + static unsigned calcNBuckets(unsigned NumHashed); + static unsigned calcMaskWords(unsigned NumHashed); + + void writeHeader(uint8_t *&Buf); + void writeBloomFilter(uint8_t *&Buf); + void writeHashTable(uint8_t *Buf); + + struct SymbolData { + SymbolBody *Body; + size_t STName; + uint32_t Hash; + }; + + std::vector<SymbolData> Symbols; + + unsigned MaskWords; + unsigned NBuckets; + unsigned Shift2; + uintX_t Size = 0; +}; + +template <class ELFT> +class HashTableSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Word Elf_Word; + +public: + HashTableSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return this->Size; } + +private: + size_t Size = 0; +}; + +template <class ELFT> class PltSection final : public SyntheticSection<ELFT> { +public: + PltSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + void addEntry(SymbolBody &Sym); + bool empty() const override { return Entries.empty(); } + +private: + std::vector<std::pair<const SymbolBody *, unsigned>> Entries; +}; + +// The IpltSection is a variant of Plt for recording entries for GNU Ifunc +// symbols that will be subject to a Target->IRelativeRel +// The IpltSection immediately follows the Plt section in the Output Section +template <class ELFT> class IpltSection final : public SyntheticSection<ELFT> { +public: + IpltSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + void addEntry(SymbolBody &Sym); + bool empty() const override { return Entries.empty(); } + +private: + std::vector<std::pair<const SymbolBody *, unsigned>> Entries; +}; + +template <class ELFT> +class GdbIndexSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + + const unsigned OffsetTypeSize = 4; + const unsigned CuListOffset = 6 * OffsetTypeSize; + const unsigned CompilationUnitSize = 16; + const unsigned AddressEntrySize = 16 + OffsetTypeSize; + const unsigned SymTabEntrySize = 2 * OffsetTypeSize; + +public: + GdbIndexSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + bool empty() const override; + + // Pairs of [CU Offset, CU length]. + std::vector<std::pair<uintX_t, uintX_t>> CompilationUnits; + + llvm::StringTableBuilder StringPool; + + GdbHashTab SymbolTable; + + // The CU vector portion of the constant pool. + std::vector<std::vector<std::pair<uint32_t, uint8_t>>> CuVectors; + + std::vector<AddressEntry<ELFT>> AddressArea; + +private: + void parseDebugSections(); + void readDwarf(InputSection<ELFT> *I); + + uint32_t CuTypesOffset; + uint32_t SymTabOffset; + uint32_t ConstantPoolOffset; + uint32_t StringPoolOffset; + + size_t CuVectorsSize = 0; + std::vector<size_t> CuVectorsOffset; + + bool Finalized = false; +}; + +// --eh-frame-hdr option tells linker to construct a header for all the +// .eh_frame sections. This header is placed to a section named .eh_frame_hdr +// and also to a PT_GNU_EH_FRAME segment. +// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by +// calling dl_iterate_phdr. +// This section contains a lookup table for quick binary search of FDEs. +// Detailed info about internals can be found in Ian Lance Taylor's blog: +// http://www.airs.com/blog/archives/460 (".eh_frame") +// http://www.airs.com/blog/archives/462 (".eh_frame_hdr") +template <class ELFT> +class EhFrameHeader final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + EhFrameHeader(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + void addFde(uint32_t Pc, uint32_t FdeVA); + bool empty() const override; + +private: + struct FdeData { + uint32_t Pc; + uint32_t FdeVA; + }; + + std::vector<FdeData> Fdes; +}; + +// For more information about .gnu.version and .gnu.version_r see: +// https://www.akkadia.org/drepper/symbol-versioning + +// The .gnu.version_d section which has a section type of SHT_GNU_verdef shall +// contain symbol version definitions. The number of entries in this section +// shall be contained in the DT_VERDEFNUM entry of the .dynamic section. +// The section shall contain an array of Elf_Verdef structures, optionally +// followed by an array of Elf_Verdaux structures. +template <class ELFT> +class VersionDefinitionSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::Verdaux Elf_Verdaux; + +public: + VersionDefinitionSection(); + void finalize() override; + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + +private: + void writeOne(uint8_t *Buf, uint32_t Index, StringRef Name, size_t NameOff); + + unsigned FileDefNameOff; +}; + +// The .gnu.version section specifies the required version of each symbol in the +// dynamic symbol table. It contains one Elf_Versym for each dynamic symbol +// table entry. An Elf_Versym is just a 16-bit integer that refers to a version +// identifier defined in the either .gnu.version_r or .gnu.version_d section. +// The values 0 and 1 are reserved. All other values are used for versions in +// the own object or in any of the dependencies. +template <class ELFT> +class VersionTableSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Versym Elf_Versym; + +public: + VersionTableSection(); + void finalize() override; + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override; +}; + +// The .gnu.version_r section defines the version identifiers used by +// .gnu.version. It contains a linked list of Elf_Verneed data structures. Each +// Elf_Verneed specifies the version requirements for a single DSO, and contains +// a reference to a linked list of Elf_Vernaux data structures which define the +// mapping from version identifiers to version names. +template <class ELFT> +class VersionNeedSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Verneed Elf_Verneed; + typedef typename ELFT::Vernaux Elf_Vernaux; + + // A vector of shared files that need Elf_Verneed data structures and the + // string table offsets of their sonames. + std::vector<std::pair<SharedFile<ELFT> *, size_t>> Needed; + + // The next available version identifier. + unsigned NextIndex; + +public: + VersionNeedSection(); + void addSymbol(SharedSymbol<ELFT> *SS); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + size_t getNeedNum() const { return Needed.size(); } + bool empty() const override; +}; + +// .MIPS.abiflags section. +template <class ELFT> +class MipsAbiFlagsSection final : public SyntheticSection<ELFT> { + typedef llvm::object::Elf_Mips_ABIFlags<ELFT> Elf_Mips_ABIFlags; + +public: + static MipsAbiFlagsSection *create(); + + MipsAbiFlagsSection(Elf_Mips_ABIFlags Flags); + size_t getSize() const override { return sizeof(Elf_Mips_ABIFlags); } + void writeTo(uint8_t *Buf) override; + +private: + Elf_Mips_ABIFlags Flags; +}; + +// .MIPS.options section. +template <class ELFT> +class MipsOptionsSection final : public SyntheticSection<ELFT> { + typedef llvm::object::Elf_Mips_Options<ELFT> Elf_Mips_Options; + typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; + +public: + static MipsOptionsSection *create(); + + MipsOptionsSection(Elf_Mips_RegInfo Reginfo); + void writeTo(uint8_t *Buf) override; + + size_t getSize() const override { + return sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); + } + +private: + Elf_Mips_RegInfo Reginfo; +}; + +// MIPS .reginfo section. +template <class ELFT> +class MipsReginfoSection final : public SyntheticSection<ELFT> { + typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; + +public: + static MipsReginfoSection *create(); + + MipsReginfoSection(Elf_Mips_RegInfo Reginfo); + size_t getSize() const override { return sizeof(Elf_Mips_RegInfo); } + void writeTo(uint8_t *Buf) override; + +private: + Elf_Mips_RegInfo Reginfo; +}; + +// This is a MIPS specific section to hold a space within the data segment +// of executable file which is pointed to by the DT_MIPS_RLD_MAP entry. +// See "Dynamic section" in Chapter 5 in the following document: +// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf +template <class ELFT> class MipsRldMapSection : public SyntheticSection<ELFT> { +public: + MipsRldMapSection(); + size_t getSize() const override { return sizeof(typename ELFT::uint); } + void writeTo(uint8_t *Buf) override; +}; + +template <class ELFT> class ARMExidxSentinelSection : public SyntheticSection<ELFT> { +public: + ARMExidxSentinelSection(); + size_t getSize() const override { return 8; } + void writeTo(uint8_t *Buf) override; +}; + +template <class ELFT> InputSection<ELFT> *createCommonSection(); +template <class ELFT> InputSection<ELFT> *createInterpSection(); +template <class ELFT> MergeInputSection<ELFT> *createCommentSection(); + +// Linker generated sections which can be used as inputs. +template <class ELFT> struct In { + static InputSection<ELFT> *ARMAttributes; + static BuildIdSection<ELFT> *BuildId; + static InputSection<ELFT> *Common; + static DynamicSection<ELFT> *Dynamic; + static StringTableSection<ELFT> *DynStrTab; + static SymbolTableSection<ELFT> *DynSymTab; + static EhFrameHeader<ELFT> *EhFrameHdr; + static GnuHashTableSection<ELFT> *GnuHashTab; + static GdbIndexSection<ELFT> *GdbIndex; + static GotSection<ELFT> *Got; + static MipsGotSection<ELFT> *MipsGot; + static GotPltSection<ELFT> *GotPlt; + static IgotPltSection<ELFT> *IgotPlt; + static HashTableSection<ELFT> *HashTab; + static InputSection<ELFT> *Interp; + static MipsRldMapSection<ELFT> *MipsRldMap; + static PltSection<ELFT> *Plt; + static IpltSection<ELFT> *Iplt; + static RelocationSection<ELFT> *RelaDyn; + static RelocationSection<ELFT> *RelaPlt; + static RelocationSection<ELFT> *RelaIplt; + static StringTableSection<ELFT> *ShStrTab; + static StringTableSection<ELFT> *StrTab; + static SymbolTableSection<ELFT> *SymTab; + static VersionDefinitionSection<ELFT> *VerDef; + static VersionTableSection<ELFT> *VerSym; + static VersionNeedSection<ELFT> *VerNeed; +}; + +template <class ELFT> InputSection<ELFT> *In<ELFT>::ARMAttributes; +template <class ELFT> BuildIdSection<ELFT> *In<ELFT>::BuildId; +template <class ELFT> InputSection<ELFT> *In<ELFT>::Common; +template <class ELFT> DynamicSection<ELFT> *In<ELFT>::Dynamic; +template <class ELFT> StringTableSection<ELFT> *In<ELFT>::DynStrTab; +template <class ELFT> SymbolTableSection<ELFT> *In<ELFT>::DynSymTab; +template <class ELFT> EhFrameHeader<ELFT> *In<ELFT>::EhFrameHdr; +template <class ELFT> GdbIndexSection<ELFT> *In<ELFT>::GdbIndex; +template <class ELFT> GnuHashTableSection<ELFT> *In<ELFT>::GnuHashTab; +template <class ELFT> GotSection<ELFT> *In<ELFT>::Got; +template <class ELFT> MipsGotSection<ELFT> *In<ELFT>::MipsGot; +template <class ELFT> GotPltSection<ELFT> *In<ELFT>::GotPlt; +template <class ELFT> IgotPltSection<ELFT> *In<ELFT>::IgotPlt; +template <class ELFT> HashTableSection<ELFT> *In<ELFT>::HashTab; +template <class ELFT> InputSection<ELFT> *In<ELFT>::Interp; +template <class ELFT> MipsRldMapSection<ELFT> *In<ELFT>::MipsRldMap; +template <class ELFT> PltSection<ELFT> *In<ELFT>::Plt; +template <class ELFT> IpltSection<ELFT> *In<ELFT>::Iplt; +template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaDyn; +template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaPlt; +template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaIplt; +template <class ELFT> StringTableSection<ELFT> *In<ELFT>::ShStrTab; +template <class ELFT> StringTableSection<ELFT> *In<ELFT>::StrTab; +template <class ELFT> SymbolTableSection<ELFT> *In<ELFT>::SymTab; +template <class ELFT> VersionDefinitionSection<ELFT> *In<ELFT>::VerDef; +template <class ELFT> VersionTableSection<ELFT> *In<ELFT>::VerSym; +template <class ELFT> VersionNeedSection<ELFT> *In<ELFT>::VerNeed; +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Target.cpp b/contrib/llvm/tools/lld/ELF/Target.cpp new file mode 100644 index 000000000000..cb2b178fa849 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Target.cpp @@ -0,0 +1,2351 @@ +//===- Target.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Machine-specific things, such as applying relocations, creation of +// GOT or PLT entries, etc., are handled in this file. +// +// Refer the ELF spec for the single letter variables, S, A or P, used +// in this file. +// +// Some functions defined in this file has "relaxTls" as part of their names. +// They do peephole optimization for TLS variables by rewriting instructions. +// They are not part of the ABI but optional optimization, so you can skip +// them if you are not interested in how TLS variables are optimized. +// See the following paper for the details. +// +// Ulrich Drepper, ELF Handling For Thread-Local Storage +// http://www.akkadia.org/drepper/tls.pdf +// +//===----------------------------------------------------------------------===// + +#include "Target.h" +#include "Error.h" +#include "InputFiles.h" +#include "Memory.h" +#include "OutputSections.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Thunks.h" +#include "Writer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; + +std::string lld::toString(uint32_t Type) { + return getELFRelocationTypeName(elf::Config->EMachine, Type); +} + +namespace lld { +namespace elf { + +TargetInfo *Target; + +static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); } +static void or32be(uint8_t *P, int32_t V) { write32be(P, read32be(P) | V); } + +template <class ELFT> static std::string getErrorLoc(uint8_t *Loc) { + for (InputSectionData *D : Symtab<ELFT>::X->Sections) { + auto *IS = dyn_cast_or_null<InputSection<ELFT>>(D); + if (!IS || !IS->OutSec) + continue; + + uint8_t *ISLoc = cast<OutputSection<ELFT>>(IS->OutSec)->Loc + IS->OutSecOff; + if (ISLoc <= Loc && Loc < ISLoc + IS->getSize()) + return IS->getLocation(Loc - ISLoc) + ": "; + } + return ""; +} + +static std::string getErrorLocation(uint8_t *Loc) { + switch (Config->EKind) { + case ELF32LEKind: + return getErrorLoc<ELF32LE>(Loc); + case ELF32BEKind: + return getErrorLoc<ELF32BE>(Loc); + case ELF64LEKind: + return getErrorLoc<ELF64LE>(Loc); + case ELF64BEKind: + return getErrorLoc<ELF64BE>(Loc); + default: + llvm_unreachable("unknown ELF type"); + } +} + +template <unsigned N> +static void checkInt(uint8_t *Loc, int64_t V, uint32_t Type) { + if (!isInt<N>(V)) + error(getErrorLocation(Loc) + "relocation " + toString(Type) + + " out of range"); +} + +template <unsigned N> +static void checkUInt(uint8_t *Loc, uint64_t V, uint32_t Type) { + if (!isUInt<N>(V)) + error(getErrorLocation(Loc) + "relocation " + toString(Type) + + " out of range"); +} + +template <unsigned N> +static void checkIntUInt(uint8_t *Loc, uint64_t V, uint32_t Type) { + if (!isInt<N>(V) && !isUInt<N>(V)) + error(getErrorLocation(Loc) + "relocation " + toString(Type) + + " out of range"); +} + +template <unsigned N> +static void checkAlignment(uint8_t *Loc, uint64_t V, uint32_t Type) { + if ((V & (N - 1)) != 0) + error(getErrorLocation(Loc) + "improper alignment for relocation " + + toString(Type)); +} + +namespace { +class X86TargetInfo final : public TargetInfo { +public: + X86TargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + void writeGotPltHeader(uint8_t *Buf) const override; + uint32_t getDynRel(uint32_t Type) const override; + bool isTlsLocalDynamicRel(uint32_t Type) const override; + bool isTlsGlobalDynamicRel(uint32_t Type) const override; + bool isTlsInitialExecRel(uint32_t Type) const override; + void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writePltHeader(uint8_t *Buf) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + + RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const override; + void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; +}; + +template <class ELFT> class X86_64TargetInfo final : public TargetInfo { +public: + X86_64TargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + bool isPicRel(uint32_t Type) const override; + bool isTlsLocalDynamicRel(uint32_t Type) const override; + bool isTlsGlobalDynamicRel(uint32_t Type) const override; + bool isTlsInitialExecRel(uint32_t Type) const override; + void writeGotPltHeader(uint8_t *Buf) const override; + void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writePltHeader(uint8_t *Buf) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + + RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const override; + void relaxGot(uint8_t *Loc, uint64_t Val) const override; + void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + +private: + void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op, + uint8_t ModRm) const; +}; + +class PPCTargetInfo final : public TargetInfo { +public: + PPCTargetInfo(); + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; +}; + +class PPC64TargetInfo final : public TargetInfo { +public: + PPC64TargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; +}; + +class AArch64TargetInfo final : public TargetInfo { +public: + AArch64TargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + bool isPicRel(uint32_t Type) const override; + bool isTlsInitialExecRel(uint32_t Type) const override; + void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writePltHeader(uint8_t *Buf) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + bool usesOnlyLowPageBits(uint32_t Type) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const override; + void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; +}; + +class AMDGPUTargetInfo final : public TargetInfo { +public: + AMDGPUTargetInfo(); + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; +}; + +class ARMTargetInfo final : public TargetInfo { +public: + ARMTargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + bool isPicRel(uint32_t Type) const override; + uint32_t getDynRel(uint32_t Type) const override; + uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + bool isTlsLocalDynamicRel(uint32_t Type) const override; + bool isTlsGlobalDynamicRel(uint32_t Type) const override; + bool isTlsInitialExecRel(uint32_t Type) const override; + void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writePltHeader(uint8_t *Buf) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, const InputFile &File, + const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; +}; + +template <class ELFT> class MipsTargetInfo final : public TargetInfo { +public: + MipsTargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + bool isPicRel(uint32_t Type) const override; + uint32_t getDynRel(uint32_t Type) const override; + bool isTlsLocalDynamicRel(uint32_t Type) const override; + bool isTlsGlobalDynamicRel(uint32_t Type) const override; + void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writePltHeader(uint8_t *Buf) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, const InputFile &File, + const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + bool usesOnlyLowPageBits(uint32_t Type) const override; +}; +} // anonymous namespace + +TargetInfo *createTarget() { + switch (Config->EMachine) { + case EM_386: + case EM_IAMCU: + return make<X86TargetInfo>(); + case EM_AARCH64: + return make<AArch64TargetInfo>(); + case EM_AMDGPU: + return make<AMDGPUTargetInfo>(); + case EM_ARM: + return make<ARMTargetInfo>(); + case EM_MIPS: + switch (Config->EKind) { + case ELF32LEKind: + return make<MipsTargetInfo<ELF32LE>>(); + case ELF32BEKind: + return make<MipsTargetInfo<ELF32BE>>(); + case ELF64LEKind: + return make<MipsTargetInfo<ELF64LE>>(); + case ELF64BEKind: + return make<MipsTargetInfo<ELF64BE>>(); + default: + fatal("unsupported MIPS target"); + } + case EM_PPC: + return make<PPCTargetInfo>(); + case EM_PPC64: + return make<PPC64TargetInfo>(); + case EM_X86_64: + if (Config->EKind == ELF32LEKind) + return make<X86_64TargetInfo<ELF32LE>>(); + return make<X86_64TargetInfo<ELF64LE>>(); + } + fatal("unknown target machine"); +} + +TargetInfo::~TargetInfo() {} + +uint64_t TargetInfo::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { + return 0; +} + +bool TargetInfo::usesOnlyLowPageBits(uint32_t Type) const { return false; } + +RelExpr TargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType, + const InputFile &File, + const SymbolBody &S) const { + return Expr; +} + +bool TargetInfo::isTlsInitialExecRel(uint32_t Type) const { return false; } + +bool TargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { return false; } + +bool TargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { return false; } + +void TargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { + writeGotPlt(Buf, S); +} + +RelExpr TargetInfo::adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const { + return Expr; +} + +void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { + llvm_unreachable("Should not have claimed to be relaxable"); +} + +void TargetInfo::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + llvm_unreachable("Should not have claimed to be relaxable"); +} + +void TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + llvm_unreachable("Should not have claimed to be relaxable"); +} + +void TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + llvm_unreachable("Should not have claimed to be relaxable"); +} + +void TargetInfo::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + llvm_unreachable("Should not have claimed to be relaxable"); +} + +X86TargetInfo::X86TargetInfo() { + CopyRel = R_386_COPY; + GotRel = R_386_GLOB_DAT; + PltRel = R_386_JUMP_SLOT; + IRelativeRel = R_386_IRELATIVE; + RelativeRel = R_386_RELATIVE; + TlsGotRel = R_386_TLS_TPOFF; + TlsModuleIndexRel = R_386_TLS_DTPMOD32; + TlsOffsetRel = R_386_TLS_DTPOFF32; + GotEntrySize = 4; + GotPltEntrySize = 4; + PltEntrySize = 16; + PltHeaderSize = 16; + TlsGdRelaxSkip = 2; +} + +RelExpr X86TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { + switch (Type) { + default: + return R_ABS; + case R_386_TLS_GD: + return R_TLSGD; + case R_386_TLS_LDM: + return R_TLSLD; + case R_386_PLT32: + return R_PLT_PC; + case R_386_PC16: + case R_386_PC32: + return R_PC; + case R_386_GOTPC: + return R_GOTONLY_PC_FROM_END; + case R_386_TLS_IE: + return R_GOT; + case R_386_GOT32: + case R_386_GOT32X: + case R_386_TLS_GOTIE: + return R_GOT_FROM_END; + case R_386_GOTOFF: + return R_GOTREL_FROM_END; + case R_386_TLS_LE: + return R_TLS; + case R_386_TLS_LE_32: + return R_NEG_TLS; + } +} + +RelExpr X86TargetInfo::adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const { + switch (Expr) { + default: + return Expr; + case R_RELAX_TLS_GD_TO_IE: + return R_RELAX_TLS_GD_TO_IE_END; + case R_RELAX_TLS_GD_TO_LE: + return R_RELAX_TLS_GD_TO_LE_NEG; + } +} + +void X86TargetInfo::writeGotPltHeader(uint8_t *Buf) const { + write32le(Buf, In<ELF32LE>::Dynamic->getVA()); +} + +void X86TargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const { + // Entries in .got.plt initially points back to the corresponding + // PLT entries with a fixed offset to skip the first instruction. + write32le(Buf, S.getPltVA<ELF32LE>() + 6); +} + +void X86TargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { + // An x86 entry is the address of the ifunc resolver function. + write32le(Buf, S.getVA<ELF32LE>()); +} + +uint32_t X86TargetInfo::getDynRel(uint32_t Type) const { + if (Type == R_386_TLS_LE) + return R_386_TLS_TPOFF; + if (Type == R_386_TLS_LE_32) + return R_386_TLS_TPOFF32; + return Type; +} + +bool X86TargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { + return Type == R_386_TLS_GD; +} + +bool X86TargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { + return Type == R_386_TLS_LDO_32 || Type == R_386_TLS_LDM; +} + +bool X86TargetInfo::isTlsInitialExecRel(uint32_t Type) const { + return Type == R_386_TLS_IE || Type == R_386_TLS_GOTIE; +} + +void X86TargetInfo::writePltHeader(uint8_t *Buf) const { + // Executable files and shared object files have + // separate procedure linkage tables. + if (Config->Pic) { + const uint8_t V[] = { + 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx) + 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) + 0x90, 0x90, 0x90, 0x90 // nop; nop; nop; nop + }; + memcpy(Buf, V, sizeof(V)); + return; + } + + const uint8_t PltData[] = { + 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushl (GOT+4) + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *(GOT+8) + 0x90, 0x90, 0x90, 0x90 // nop; nop; nop; nop + }; + memcpy(Buf, PltData, sizeof(PltData)); + uint32_t Got = In<ELF32LE>::GotPlt->getVA(); + write32le(Buf + 2, Got + 4); + write32le(Buf + 8, Got + 8); +} + +void X86TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const uint8_t Inst[] = { + 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, // jmp *foo_in_GOT|*foo@GOT(%ebx) + 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $reloc_offset + 0xe9, 0x00, 0x00, 0x00, 0x00 // jmp .PLT0@PC + }; + memcpy(Buf, Inst, sizeof(Inst)); + + // jmp *foo@GOT(%ebx) or jmp *foo_in_GOT + Buf[1] = Config->Pic ? 0xa3 : 0x25; + uint32_t Got = In<ELF32LE>::GotPlt->getVA(); + write32le(Buf + 2, Config->Shared ? GotEntryAddr - Got : GotEntryAddr); + write32le(Buf + 7, RelOff); + write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16); +} + +uint64_t X86TargetInfo::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { + switch (Type) { + default: + return 0; + case R_386_16: + case R_386_PC16: + return read16le(Buf); + case R_386_32: + case R_386_GOT32: + case R_386_GOT32X: + case R_386_GOTOFF: + case R_386_GOTPC: + case R_386_PC32: + case R_386_PLT32: + case R_386_TLS_LE: + return read32le(Buf); + } +} + +void X86TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + checkInt<32>(Loc, Val, Type); + + // R_386_PC16 and R_386_16 are not part of the current i386 psABI. They are + // used by 16-bit x86 objects, like boot loaders. + if (Type == R_386_16 || Type == R_386_PC16) { + write16le(Loc, Val); + return; + } + write32le(Loc, Val); +} + +void X86TargetInfo::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Convert + // leal x@tlsgd(, %ebx, 1), + // call __tls_get_addr@plt + // to + // movl %gs:0,%eax + // subl $x@ntpoff,%eax + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax + 0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 // subl 0(%ebx), %eax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); + relocateOne(Loc + 5, R_386_32, Val); +} + +void X86TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Convert + // leal x@tlsgd(, %ebx, 1), + // call __tls_get_addr@plt + // to + // movl %gs:0, %eax + // addl x@gotntpoff(%ebx), %eax + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax + 0x03, 0x83, 0x00, 0x00, 0x00, 0x00 // addl 0(%ebx), %eax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); + relocateOne(Loc + 5, R_386_32, Val); +} + +// In some conditions, relocations can be optimized to avoid using GOT. +// This function does that for Initial Exec to Local Exec case. +void X86TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Ulrich's document section 6.2 says that @gotntpoff can + // be used with MOVL or ADDL instructions. + // @indntpoff is similar to @gotntpoff, but for use in + // position dependent code. + uint8_t Reg = (Loc[-1] >> 3) & 7; + + if (Type == R_386_TLS_IE) { + if (Loc[-1] == 0xa1) { + // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" + // This case is different from the generic case below because + // this is a 5 byte instruction while below is 6 bytes. + Loc[-1] = 0xb8; + } else if (Loc[-2] == 0x8b) { + // "movl foo@indntpoff,%reg" -> "movl $foo,%reg" + Loc[-2] = 0xc7; + Loc[-1] = 0xc0 | Reg; + } else { + // "addl foo@indntpoff,%reg" -> "addl $foo,%reg" + Loc[-2] = 0x81; + Loc[-1] = 0xc0 | Reg; + } + } else { + assert(Type == R_386_TLS_GOTIE); + if (Loc[-2] == 0x8b) { + // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" + Loc[-2] = 0xc7; + Loc[-1] = 0xc0 | Reg; + } else { + // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg" + Loc[-2] = 0x8d; + Loc[-1] = 0x80 | (Reg << 3) | Reg; + } + } + relocateOne(Loc, R_386_TLS_LE, Val); +} + +void X86TargetInfo::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + if (Type == R_386_TLS_LDO_32) { + relocateOne(Loc, R_386_TLS_LE, Val); + return; + } + + // Convert + // leal foo(%reg),%eax + // call ___tls_get_addr + // to + // movl %gs:0,%eax + // nop + // leal 0(%esi,1),%esi + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax + 0x90, // nop + 0x8d, 0x74, 0x26, 0x00 // leal 0(%esi,1),%esi + }; + memcpy(Loc - 2, Inst, sizeof(Inst)); +} + +template <class ELFT> X86_64TargetInfo<ELFT>::X86_64TargetInfo() { + CopyRel = R_X86_64_COPY; + GotRel = R_X86_64_GLOB_DAT; + PltRel = R_X86_64_JUMP_SLOT; + RelativeRel = R_X86_64_RELATIVE; + IRelativeRel = R_X86_64_IRELATIVE; + TlsGotRel = R_X86_64_TPOFF64; + TlsModuleIndexRel = R_X86_64_DTPMOD64; + TlsOffsetRel = R_X86_64_DTPOFF64; + GotEntrySize = 8; + GotPltEntrySize = 8; + PltEntrySize = 16; + PltHeaderSize = 16; + TlsGdRelaxSkip = 2; + // Align to the large page size (known as a superpage or huge page). + // FreeBSD automatically promotes large, superpage-aligned allocations. + DefaultImageBase = 0x200000; +} + +template <class ELFT> +RelExpr X86_64TargetInfo<ELFT>::getRelExpr(uint32_t Type, + const SymbolBody &S) const { + switch (Type) { + default: + return R_ABS; + case R_X86_64_TPOFF32: + return R_TLS; + case R_X86_64_TLSLD: + return R_TLSLD_PC; + case R_X86_64_TLSGD: + return R_TLSGD_PC; + case R_X86_64_SIZE32: + case R_X86_64_SIZE64: + return R_SIZE; + case R_X86_64_PLT32: + return R_PLT_PC; + case R_X86_64_PC32: + case R_X86_64_PC64: + return R_PC; + case R_X86_64_GOT32: + case R_X86_64_GOT64: + return R_GOT_FROM_END; + case R_X86_64_GOTPCREL: + case R_X86_64_GOTPCRELX: + case R_X86_64_REX_GOTPCRELX: + case R_X86_64_GOTTPOFF: + return R_GOT_PC; + case R_X86_64_NONE: + return R_HINT; + } +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::writeGotPltHeader(uint8_t *Buf) const { + // The first entry holds the value of _DYNAMIC. It is not clear why that is + // required, but it is documented in the psabi and the glibc dynamic linker + // seems to use it (note that this is relevant for linking ld.so, not any + // other program). + write64le(Buf, In<ELFT>::Dynamic->getVA()); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::writeGotPlt(uint8_t *Buf, + const SymbolBody &S) const { + // See comments in X86TargetInfo::writeGotPlt. + write32le(Buf, S.getPltVA<ELFT>() + 6); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::writePltHeader(uint8_t *Buf) const { + const uint8_t PltData[] = { + 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushq GOT+8(%rip) + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *GOT+16(%rip) + 0x0f, 0x1f, 0x40, 0x00 // nopl 0x0(rax) + }; + memcpy(Buf, PltData, sizeof(PltData)); + uint64_t Got = In<ELFT>::GotPlt->getVA(); + uint64_t Plt = In<ELFT>::Plt->getVA(); + write32le(Buf + 2, Got - Plt + 2); // GOT+8 + write32le(Buf + 8, Got - Plt + 4); // GOT+16 +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const uint8_t Inst[] = { + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmpq *got(%rip) + 0x68, 0x00, 0x00, 0x00, 0x00, // pushq <relocation index> + 0xe9, 0x00, 0x00, 0x00, 0x00 // jmpq plt[0] + }; + memcpy(Buf, Inst, sizeof(Inst)); + + write32le(Buf + 2, GotEntryAddr - PltEntryAddr - 6); + write32le(Buf + 7, Index); + write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16); +} + +template <class ELFT> +bool X86_64TargetInfo<ELFT>::isPicRel(uint32_t Type) const { + return Type != R_X86_64_PC32 && Type != R_X86_64_32; +} + +template <class ELFT> +bool X86_64TargetInfo<ELFT>::isTlsInitialExecRel(uint32_t Type) const { + return Type == R_X86_64_GOTTPOFF; +} + +template <class ELFT> +bool X86_64TargetInfo<ELFT>::isTlsGlobalDynamicRel(uint32_t Type) const { + return Type == R_X86_64_TLSGD; +} + +template <class ELFT> +bool X86_64TargetInfo<ELFT>::isTlsLocalDynamicRel(uint32_t Type) const { + return Type == R_X86_64_DTPOFF32 || Type == R_X86_64_DTPOFF64 || + Type == R_X86_64_TLSLD; +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Convert + // .byte 0x66 + // leaq x@tlsgd(%rip), %rdi + // .word 0x6666 + // rex64 + // call __tls_get_addr@plt + // to + // mov %fs:0x0,%rax + // lea x@tpoff,%rax + const uint8_t Inst[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax + 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff,%rax + }; + memcpy(Loc - 4, Inst, sizeof(Inst)); + // The original code used a pc relative relocation and so we have to + // compensate for the -4 in had in the addend. + relocateOne(Loc + 8, R_X86_64_TPOFF32, Val + 4); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Convert + // .byte 0x66 + // leaq x@tlsgd(%rip), %rdi + // .word 0x6666 + // rex64 + // call __tls_get_addr@plt + // to + // mov %fs:0x0,%rax + // addq x@tpoff,%rax + const uint8_t Inst[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax + 0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // addq x@tpoff,%rax + }; + memcpy(Loc - 4, Inst, sizeof(Inst)); + // Both code sequences are PC relatives, but since we are moving the constant + // forward by 8 bytes we have to subtract the value by 8. + relocateOne(Loc + 8, R_X86_64_PC32, Val - 8); +} + +// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to +// R_X86_64_TPOFF32 so that it does not use GOT. +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + uint8_t *Inst = Loc - 3; + uint8_t Reg = Loc[-1] >> 3; + uint8_t *RegSlot = Loc - 1; + + // Note that ADD with RSP or R12 is converted to ADD instead of LEA + // because LEA with these registers needs 4 bytes to encode and thus + // wouldn't fit the space. + + if (memcmp(Inst, "\x48\x03\x25", 3) == 0) { + // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp" + memcpy(Inst, "\x48\x81\xc4", 3); + } else if (memcmp(Inst, "\x4c\x03\x25", 3) == 0) { + // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12" + memcpy(Inst, "\x49\x81\xc4", 3); + } else if (memcmp(Inst, "\x4c\x03", 2) == 0) { + // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]" + memcpy(Inst, "\x4d\x8d", 2); + *RegSlot = 0x80 | (Reg << 3) | Reg; + } else if (memcmp(Inst, "\x48\x03", 2) == 0) { + // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg" + memcpy(Inst, "\x48\x8d", 2); + *RegSlot = 0x80 | (Reg << 3) | Reg; + } else if (memcmp(Inst, "\x4c\x8b", 2) == 0) { + // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]" + memcpy(Inst, "\x49\xc7", 2); + *RegSlot = 0xc0 | Reg; + } else if (memcmp(Inst, "\x48\x8b", 2) == 0) { + // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg" + memcpy(Inst, "\x48\xc7", 2); + *RegSlot = 0xc0 | Reg; + } else { + error(getErrorLocation(Loc - 3) + + "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only"); + } + + // The original code used a PC relative relocation. + // Need to compensate for the -4 it had in the addend. + relocateOne(Loc, R_X86_64_TPOFF32, Val + 4); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Convert + // leaq bar@tlsld(%rip), %rdi + // callq __tls_get_addr@PLT + // leaq bar@dtpoff(%rax), %rcx + // to + // .word 0x6666 + // .byte 0x66 + // mov %fs:0,%rax + // leaq bar@tpoff(%rax), %rcx + if (Type == R_X86_64_DTPOFF64) { + write64le(Loc, Val); + return; + } + if (Type == R_X86_64_DTPOFF32) { + relocateOne(Loc, R_X86_64_TPOFF32, Val); + return; + } + + const uint8_t Inst[] = { + 0x66, 0x66, // .word 0x6666 + 0x66, // .byte 0x66 + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + switch (Type) { + case R_X86_64_32: + checkUInt<32>(Loc, Val, Type); + write32le(Loc, Val); + break; + case R_X86_64_32S: + case R_X86_64_TPOFF32: + case R_X86_64_GOT32: + case R_X86_64_GOTPCREL: + case R_X86_64_GOTPCRELX: + case R_X86_64_REX_GOTPCRELX: + case R_X86_64_PC32: + case R_X86_64_GOTTPOFF: + case R_X86_64_PLT32: + case R_X86_64_TLSGD: + case R_X86_64_TLSLD: + case R_X86_64_DTPOFF32: + case R_X86_64_SIZE32: + checkInt<32>(Loc, Val, Type); + write32le(Loc, Val); + break; + case R_X86_64_64: + case R_X86_64_DTPOFF64: + case R_X86_64_GLOB_DAT: + case R_X86_64_PC64: + case R_X86_64_SIZE64: + case R_X86_64_GOT64: + write64le(Loc, Val); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +template <class ELFT> +RelExpr X86_64TargetInfo<ELFT>::adjustRelaxExpr(uint32_t Type, + const uint8_t *Data, + RelExpr RelExpr) const { + if (Type != R_X86_64_GOTPCRELX && Type != R_X86_64_REX_GOTPCRELX) + return RelExpr; + const uint8_t Op = Data[-2]; + const uint8_t ModRm = Data[-1]; + // FIXME: When PIC is disabled and foo is defined locally in the + // lower 32 bit address space, memory operand in mov can be converted into + // immediate operand. Otherwise, mov must be changed to lea. We support only + // latter relaxation at this moment. + if (Op == 0x8b) + return R_RELAX_GOT_PC; + // Relax call and jmp. + if (Op == 0xff && (ModRm == 0x15 || ModRm == 0x25)) + return R_RELAX_GOT_PC; + + // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor. + // If PIC then no relaxation is available. + // We also don't relax test/binop instructions without REX byte, + // they are 32bit operations and not common to have. + assert(Type == R_X86_64_REX_GOTPCRELX); + return Config->Pic ? RelExpr : R_RELAX_GOT_PC_NOPIC; +} + +// A subset of relaxations can only be applied for no-PIC. This method +// handles such relaxations. Instructions encoding information was taken from: +// "Intel 64 and IA-32 Architectures Software Developer's Manual V2" +// (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/ +// 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf) +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxGotNoPic(uint8_t *Loc, uint64_t Val, + uint8_t Op, uint8_t ModRm) const { + const uint8_t Rex = Loc[-3]; + // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg". + if (Op == 0x85) { + // See "TEST-Logical Compare" (4-428 Vol. 2B), + // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension). + + // ModR/M byte has form XX YYY ZZZ, where + // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1). + // XX has different meanings: + // 00: The operand's memory address is in reg1. + // 01: The operand's memory address is reg1 + a byte-sized displacement. + // 10: The operand's memory address is reg1 + a word-sized displacement. + // 11: The operand is reg1 itself. + // If an instruction requires only one operand, the unused reg2 field + // holds extra opcode bits rather than a register code + // 0xC0 == 11 000 000 binary. + // 0x38 == 00 111 000 binary. + // We transfer reg2 to reg1 here as operand. + // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3). + Loc[-1] = 0xc0 | (ModRm & 0x38) >> 3; // ModR/M byte. + + // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32 + // See "TEST-Logical Compare" (4-428 Vol. 2B). + Loc[-2] = 0xf7; + + // Move R bit to the B bit in REX byte. + // REX byte is encoded as 0100WRXB, where + // 0100 is 4bit fixed pattern. + // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the + // default operand size is used (which is 32-bit for most but not all + // instructions). + // REX.R This 1-bit value is an extension to the MODRM.reg field. + // REX.X This 1-bit value is an extension to the SIB.index field. + // REX.B This 1-bit value is an extension to the MODRM.rm field or the + // SIB.base field. + // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A). + Loc[-3] = (Rex & ~0x4) | (Rex & 0x4) >> 2; + relocateOne(Loc, R_X86_64_PC32, Val); + return; + } + + // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub + // or xor operations. + + // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg". + // Logic is close to one for test instruction above, but we also + // write opcode extension here, see below for details. + Loc[-1] = 0xc0 | (ModRm & 0x38) >> 3 | (Op & 0x3c); // ModR/M byte. + + // Primary opcode is 0x81, opcode extension is one of: + // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB, + // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP. + // This value was wrote to MODRM.reg in a line above. + // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15), + // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for + // descriptions about each operation. + Loc[-2] = 0x81; + Loc[-3] = (Rex & ~0x4) | (Rex & 0x4) >> 2; + relocateOne(Loc, R_X86_64_PC32, Val); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxGot(uint8_t *Loc, uint64_t Val) const { + const uint8_t Op = Loc[-2]; + const uint8_t ModRm = Loc[-1]; + + // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg". + if (Op == 0x8b) { + Loc[-2] = 0x8d; + relocateOne(Loc, R_X86_64_PC32, Val); + return; + } + + if (Op != 0xff) { + // We are relaxing a rip relative to an absolute, so compensate + // for the old -4 addend. + assert(!Config->Pic); + relaxGotNoPic(Loc, Val + 4, Op, ModRm); + return; + } + + // Convert call/jmp instructions. + if (ModRm == 0x15) { + // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo". + // Instead we convert to "addr32 call foo" where addr32 is an instruction + // prefix. That makes result expression to be a single instruction. + Loc[-2] = 0x67; // addr32 prefix + Loc[-1] = 0xe8; // call + relocateOne(Loc, R_X86_64_PC32, Val); + return; + } + + // Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop". + // jmp doesn't return, so it is fine to use nop here, it is just a stub. + assert(ModRm == 0x25); + Loc[-2] = 0xe9; // jmp + Loc[3] = 0x90; // nop + relocateOne(Loc - 1, R_X86_64_PC32, Val + 1); +} + +// Relocation masks following the #lo(value), #hi(value), #ha(value), +// #higher(value), #highera(value), #highest(value), and #highesta(value) +// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi +// document. +static uint16_t applyPPCLo(uint64_t V) { return V; } +static uint16_t applyPPCHi(uint64_t V) { return V >> 16; } +static uint16_t applyPPCHa(uint64_t V) { return (V + 0x8000) >> 16; } +static uint16_t applyPPCHigher(uint64_t V) { return V >> 32; } +static uint16_t applyPPCHighera(uint64_t V) { return (V + 0x8000) >> 32; } +static uint16_t applyPPCHighest(uint64_t V) { return V >> 48; } +static uint16_t applyPPCHighesta(uint64_t V) { return (V + 0x8000) >> 48; } + +PPCTargetInfo::PPCTargetInfo() {} + +void PPCTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + switch (Type) { + case R_PPC_ADDR16_HA: + write16be(Loc, applyPPCHa(Val)); + break; + case R_PPC_ADDR16_LO: + write16be(Loc, applyPPCLo(Val)); + break; + case R_PPC_ADDR32: + case R_PPC_REL32: + write32be(Loc, Val); + break; + case R_PPC_REL24: + or32be(Loc, Val & 0x3FFFFFC); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +RelExpr PPCTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { + switch (Type) { + case R_PPC_REL24: + case R_PPC_REL32: + return R_PC; + default: + return R_ABS; + } +} + +PPC64TargetInfo::PPC64TargetInfo() { + PltRel = GotRel = R_PPC64_GLOB_DAT; + RelativeRel = R_PPC64_RELATIVE; + GotEntrySize = 8; + GotPltEntrySize = 8; + PltEntrySize = 32; + PltHeaderSize = 0; + + // We need 64K pages (at least under glibc/Linux, the loader won't + // set different permissions on a finer granularity than that). + DefaultMaxPageSize = 65536; + + // The PPC64 ELF ABI v1 spec, says: + // + // It is normally desirable to put segments with different characteristics + // in separate 256 Mbyte portions of the address space, to give the + // operating system full paging flexibility in the 64-bit address space. + // + // And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers + // use 0x10000000 as the starting address. + DefaultImageBase = 0x10000000; +} + +static uint64_t PPC64TocOffset = 0x8000; + +uint64_t getPPC64TocBase() { + // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The + // TOC starts where the first of these sections starts. We always create a + // .got when we see a relocation that uses it, so for us the start is always + // the .got. + uint64_t TocVA = In<ELF64BE>::Got->getVA(); + + // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000 + // thus permitting a full 64 Kbytes segment. Note that the glibc startup + // code (crt1.o) assumes that you can get from the TOC base to the + // start of the .toc section with only a single (signed) 16-bit relocation. + return TocVA + PPC64TocOffset; +} + +RelExpr PPC64TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { + switch (Type) { + default: + return R_ABS; + case R_PPC64_TOC16: + case R_PPC64_TOC16_DS: + case R_PPC64_TOC16_HA: + case R_PPC64_TOC16_HI: + case R_PPC64_TOC16_LO: + case R_PPC64_TOC16_LO_DS: + return R_GOTREL; + case R_PPC64_TOC: + return R_PPC_TOC; + case R_PPC64_REL24: + return R_PPC_PLT_OPD; + } +} + +void PPC64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + uint64_t Off = GotEntryAddr - getPPC64TocBase(); + + // FIXME: What we should do, in theory, is get the offset of the function + // descriptor in the .opd section, and use that as the offset from %r2 (the + // TOC-base pointer). Instead, we have the GOT-entry offset, and that will + // be a pointer to the function descriptor in the .opd section. Using + // this scheme is simpler, but requires an extra indirection per PLT dispatch. + + write32be(Buf, 0xf8410028); // std %r2, 40(%r1) + write32be(Buf + 4, 0x3d620000 | applyPPCHa(Off)); // addis %r11, %r2, X@ha + write32be(Buf + 8, 0xe98b0000 | applyPPCLo(Off)); // ld %r12, X@l(%r11) + write32be(Buf + 12, 0xe96c0000); // ld %r11,0(%r12) + write32be(Buf + 16, 0x7d6903a6); // mtctr %r11 + write32be(Buf + 20, 0xe84c0008); // ld %r2,8(%r12) + write32be(Buf + 24, 0xe96c0010); // ld %r11,16(%r12) + write32be(Buf + 28, 0x4e800420); // bctr +} + +static std::pair<uint32_t, uint64_t> toAddr16Rel(uint32_t Type, uint64_t Val) { + uint64_t V = Val - PPC64TocOffset; + switch (Type) { + case R_PPC64_TOC16: + return {R_PPC64_ADDR16, V}; + case R_PPC64_TOC16_DS: + return {R_PPC64_ADDR16_DS, V}; + case R_PPC64_TOC16_HA: + return {R_PPC64_ADDR16_HA, V}; + case R_PPC64_TOC16_HI: + return {R_PPC64_ADDR16_HI, V}; + case R_PPC64_TOC16_LO: + return {R_PPC64_ADDR16_LO, V}; + case R_PPC64_TOC16_LO_DS: + return {R_PPC64_ADDR16_LO_DS, V}; + default: + return {Type, Val}; + } +} + +void PPC64TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // For a TOC-relative relocation, proceed in terms of the corresponding + // ADDR16 relocation type. + std::tie(Type, Val) = toAddr16Rel(Type, Val); + + switch (Type) { + case R_PPC64_ADDR14: { + checkAlignment<4>(Loc, Val, Type); + // Preserve the AA/LK bits in the branch instruction + uint8_t AALK = Loc[3]; + write16be(Loc + 2, (AALK & 3) | (Val & 0xfffc)); + break; + } + case R_PPC64_ADDR16: + checkInt<16>(Loc, Val, Type); + write16be(Loc, Val); + break; + case R_PPC64_ADDR16_DS: + checkInt<16>(Loc, Val, Type); + write16be(Loc, (read16be(Loc) & 3) | (Val & ~3)); + break; + case R_PPC64_ADDR16_HA: + case R_PPC64_REL16_HA: + write16be(Loc, applyPPCHa(Val)); + break; + case R_PPC64_ADDR16_HI: + case R_PPC64_REL16_HI: + write16be(Loc, applyPPCHi(Val)); + break; + case R_PPC64_ADDR16_HIGHER: + write16be(Loc, applyPPCHigher(Val)); + break; + case R_PPC64_ADDR16_HIGHERA: + write16be(Loc, applyPPCHighera(Val)); + break; + case R_PPC64_ADDR16_HIGHEST: + write16be(Loc, applyPPCHighest(Val)); + break; + case R_PPC64_ADDR16_HIGHESTA: + write16be(Loc, applyPPCHighesta(Val)); + break; + case R_PPC64_ADDR16_LO: + write16be(Loc, applyPPCLo(Val)); + break; + case R_PPC64_ADDR16_LO_DS: + case R_PPC64_REL16_LO: + write16be(Loc, (read16be(Loc) & 3) | (applyPPCLo(Val) & ~3)); + break; + case R_PPC64_ADDR32: + case R_PPC64_REL32: + checkInt<32>(Loc, Val, Type); + write32be(Loc, Val); + break; + case R_PPC64_ADDR64: + case R_PPC64_REL64: + case R_PPC64_TOC: + write64be(Loc, Val); + break; + case R_PPC64_REL24: { + uint32_t Mask = 0x03FFFFFC; + checkInt<24>(Loc, Val, Type); + write32be(Loc, (read32be(Loc) & ~Mask) | (Val & Mask)); + break; + } + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +AArch64TargetInfo::AArch64TargetInfo() { + CopyRel = R_AARCH64_COPY; + RelativeRel = R_AARCH64_RELATIVE; + IRelativeRel = R_AARCH64_IRELATIVE; + GotRel = R_AARCH64_GLOB_DAT; + PltRel = R_AARCH64_JUMP_SLOT; + TlsDescRel = R_AARCH64_TLSDESC; + TlsGotRel = R_AARCH64_TLS_TPREL64; + GotEntrySize = 8; + GotPltEntrySize = 8; + PltEntrySize = 16; + PltHeaderSize = 32; + DefaultMaxPageSize = 65536; + + // It doesn't seem to be documented anywhere, but tls on aarch64 uses variant + // 1 of the tls structures and the tcb size is 16. + TcbSize = 16; +} + +RelExpr AArch64TargetInfo::getRelExpr(uint32_t Type, + const SymbolBody &S) const { + switch (Type) { + default: + return R_ABS; + case R_AARCH64_TLSDESC_ADR_PAGE21: + return R_TLSDESC_PAGE; + case R_AARCH64_TLSDESC_LD64_LO12_NC: + case R_AARCH64_TLSDESC_ADD_LO12_NC: + return R_TLSDESC; + case R_AARCH64_TLSDESC_CALL: + return R_TLSDESC_CALL; + case R_AARCH64_TLSLE_ADD_TPREL_HI12: + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + return R_TLS; + case R_AARCH64_CALL26: + case R_AARCH64_CONDBR19: + case R_AARCH64_JUMP26: + case R_AARCH64_TSTBR14: + return R_PLT_PC; + case R_AARCH64_PREL16: + case R_AARCH64_PREL32: + case R_AARCH64_PREL64: + case R_AARCH64_ADR_PREL_LO21: + return R_PC; + case R_AARCH64_ADR_PREL_PG_HI21: + return R_PAGE_PC; + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + return R_GOT; + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + return R_GOT_PAGE_PC; + } +} + +RelExpr AArch64TargetInfo::adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const { + if (Expr == R_RELAX_TLS_GD_TO_IE) { + if (Type == R_AARCH64_TLSDESC_ADR_PAGE21) + return R_RELAX_TLS_GD_TO_IE_PAGE_PC; + return R_RELAX_TLS_GD_TO_IE_ABS; + } + return Expr; +} + +bool AArch64TargetInfo::usesOnlyLowPageBits(uint32_t Type) const { + switch (Type) { + default: + return false; + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_TLSDESC_ADD_LO12_NC: + case R_AARCH64_TLSDESC_LD64_LO12_NC: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + return true; + } +} + +bool AArch64TargetInfo::isTlsInitialExecRel(uint32_t Type) const { + return Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 || + Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; +} + +bool AArch64TargetInfo::isPicRel(uint32_t Type) const { + return Type == R_AARCH64_ABS32 || Type == R_AARCH64_ABS64; +} + +void AArch64TargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { + write64le(Buf, In<ELF64LE>::Plt->getVA()); +} + +// Page(Expr) is the page address of the expression Expr, defined +// as (Expr & ~0xFFF). (This applies even if the machine page size +// supported by the platform has a different value.) +uint64_t getAArch64Page(uint64_t Expr) { + return Expr & (~static_cast<uint64_t>(0xFFF)); +} + +void AArch64TargetInfo::writePltHeader(uint8_t *Buf) const { + const uint8_t PltData[] = { + 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]! + 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[2])) + 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[2]))] + 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[2])) + 0x20, 0x02, 0x1f, 0xd6, // br x17 + 0x1f, 0x20, 0x03, 0xd5, // nop + 0x1f, 0x20, 0x03, 0xd5, // nop + 0x1f, 0x20, 0x03, 0xd5 // nop + }; + memcpy(Buf, PltData, sizeof(PltData)); + + uint64_t Got = In<ELF64LE>::GotPlt->getVA(); + uint64_t Plt = In<ELF64LE>::Plt->getVA(); + relocateOne(Buf + 4, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(Got + 16) - getAArch64Page(Plt + 4)); + relocateOne(Buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, Got + 16); + relocateOne(Buf + 12, R_AARCH64_ADD_ABS_LO12_NC, Got + 16); +} + +void AArch64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const uint8_t Inst[] = { + 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n])) + 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[n]))] + 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[n])) + 0x20, 0x02, 0x1f, 0xd6 // br x17 + }; + memcpy(Buf, Inst, sizeof(Inst)); + + relocateOne(Buf, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(GotEntryAddr) - getAArch64Page(PltEntryAddr)); + relocateOne(Buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, GotEntryAddr); + relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotEntryAddr); +} + +static void write32AArch64Addr(uint8_t *L, uint64_t Imm) { + uint32_t ImmLo = (Imm & 0x3) << 29; + uint32_t ImmHi = (Imm & 0x1FFFFC) << 3; + uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3); + write32le(L, (read32le(L) & ~Mask) | ImmLo | ImmHi); +} + +// Return the bits [Start, End] from Val shifted Start bits. +// For instance, getBits(0xF0, 4, 8) returns 0xF. +static uint64_t getBits(uint64_t Val, int Start, int End) { + uint64_t Mask = ((uint64_t)1 << (End + 1 - Start)) - 1; + return (Val >> Start) & Mask; +} + +// Update the immediate field in a AARCH64 ldr, str, and add instruction. +static void or32AArch64Imm(uint8_t *L, uint64_t Imm) { + or32le(L, (Imm & 0xFFF) << 10); +} + +void AArch64TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + switch (Type) { + case R_AARCH64_ABS16: + case R_AARCH64_PREL16: + checkIntUInt<16>(Loc, Val, Type); + write16le(Loc, Val); + break; + case R_AARCH64_ABS32: + case R_AARCH64_PREL32: + checkIntUInt<32>(Loc, Val, Type); + write32le(Loc, Val); + break; + case R_AARCH64_ABS64: + case R_AARCH64_GLOB_DAT: + case R_AARCH64_PREL64: + write64le(Loc, Val); + break; + case R_AARCH64_ADD_ABS_LO12_NC: + or32AArch64Imm(Loc, Val); + break; + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + case R_AARCH64_TLSDESC_ADR_PAGE21: + checkInt<33>(Loc, Val, Type); + write32AArch64Addr(Loc, Val >> 12); + break; + case R_AARCH64_ADR_PREL_LO21: + checkInt<21>(Loc, Val, Type); + write32AArch64Addr(Loc, Val); + break; + case R_AARCH64_CALL26: + case R_AARCH64_JUMP26: + checkInt<28>(Loc, Val, Type); + or32le(Loc, (Val & 0x0FFFFFFC) >> 2); + break; + case R_AARCH64_CONDBR19: + checkInt<21>(Loc, Val, Type); + or32le(Loc, (Val & 0x1FFFFC) << 3); + break; + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + case R_AARCH64_TLSDESC_LD64_LO12_NC: + checkAlignment<8>(Loc, Val, Type); + or32le(Loc, (Val & 0xFF8) << 7); + break; + case R_AARCH64_LDST8_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 0, 11)); + break; + case R_AARCH64_LDST16_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 1, 11)); + break; + case R_AARCH64_LDST32_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 2, 11)); + break; + case R_AARCH64_LDST64_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 3, 11)); + break; + case R_AARCH64_LDST128_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 4, 11)); + break; + case R_AARCH64_MOVW_UABS_G0_NC: + or32le(Loc, (Val & 0xFFFF) << 5); + break; + case R_AARCH64_MOVW_UABS_G1_NC: + or32le(Loc, (Val & 0xFFFF0000) >> 11); + break; + case R_AARCH64_MOVW_UABS_G2_NC: + or32le(Loc, (Val & 0xFFFF00000000) >> 27); + break; + case R_AARCH64_MOVW_UABS_G3: + or32le(Loc, (Val & 0xFFFF000000000000) >> 43); + break; + case R_AARCH64_TSTBR14: + checkInt<16>(Loc, Val, Type); + or32le(Loc, (Val & 0xFFFC) << 3); + break; + case R_AARCH64_TLSLE_ADD_TPREL_HI12: + checkInt<24>(Loc, Val, Type); + or32AArch64Imm(Loc, Val >> 12); + break; + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + case R_AARCH64_TLSDESC_ADD_LO12_NC: + or32AArch64Imm(Loc, Val); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +void AArch64TargetInfo::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // TLSDESC Global-Dynamic relocation are in the form: + // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] + // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12_NC] + // add x0, x0, :tlsdesc_los:v [_AARCH64_TLSDESC_ADD_LO12_NC] + // .tlsdesccall [R_AARCH64_TLSDESC_CALL] + // blr x1 + // And it can optimized to: + // movz x0, #0x0, lsl #16 + // movk x0, #0x10 + // nop + // nop + checkUInt<32>(Loc, Val, Type); + + switch (Type) { + case R_AARCH64_TLSDESC_ADD_LO12_NC: + case R_AARCH64_TLSDESC_CALL: + write32le(Loc, 0xd503201f); // nop + return; + case R_AARCH64_TLSDESC_ADR_PAGE21: + write32le(Loc, 0xd2a00000 | (((Val >> 16) & 0xffff) << 5)); // movz + return; + case R_AARCH64_TLSDESC_LD64_LO12_NC: + write32le(Loc, 0xf2800000 | ((Val & 0xffff) << 5)); // movk + return; + default: + llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + } +} + +void AArch64TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // TLSDESC Global-Dynamic relocation are in the form: + // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] + // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12_NC] + // add x0, x0, :tlsdesc_los:v [_AARCH64_TLSDESC_ADD_LO12_NC] + // .tlsdesccall [R_AARCH64_TLSDESC_CALL] + // blr x1 + // And it can optimized to: + // adrp x0, :gottprel:v + // ldr x0, [x0, :gottprel_lo12:v] + // nop + // nop + + switch (Type) { + case R_AARCH64_TLSDESC_ADD_LO12_NC: + case R_AARCH64_TLSDESC_CALL: + write32le(Loc, 0xd503201f); // nop + break; + case R_AARCH64_TLSDESC_ADR_PAGE21: + write32le(Loc, 0x90000000); // adrp + relocateOne(Loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, Val); + break; + case R_AARCH64_TLSDESC_LD64_LO12_NC: + write32le(Loc, 0xf9400000); // ldr + relocateOne(Loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, Val); + break; + default: + llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + } +} + +void AArch64TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + checkUInt<32>(Loc, Val, Type); + + if (Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { + // Generate MOVZ. + uint32_t RegNo = read32le(Loc) & 0x1f; + write32le(Loc, (0xd2a00000 | RegNo) | (((Val >> 16) & 0xffff) << 5)); + return; + } + if (Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) { + // Generate MOVK. + uint32_t RegNo = read32le(Loc) & 0x1f; + write32le(Loc, (0xf2800000 | RegNo) | ((Val & 0xffff) << 5)); + return; + } + llvm_unreachable("invalid relocation for TLS IE to LE relaxation"); +} + +AMDGPUTargetInfo::AMDGPUTargetInfo() { + RelativeRel = R_AMDGPU_REL64; + GotRel = R_AMDGPU_ABS64; + GotEntrySize = 8; +} + +void AMDGPUTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + switch (Type) { + case R_AMDGPU_ABS32: + case R_AMDGPU_GOTPCREL: + case R_AMDGPU_GOTPCREL32_LO: + case R_AMDGPU_REL32: + case R_AMDGPU_REL32_LO: + write32le(Loc, Val); + break; + case R_AMDGPU_ABS64: + write64le(Loc, Val); + break; + case R_AMDGPU_GOTPCREL32_HI: + case R_AMDGPU_REL32_HI: + write32le(Loc, Val >> 32); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +RelExpr AMDGPUTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { + switch (Type) { + case R_AMDGPU_ABS32: + case R_AMDGPU_ABS64: + return R_ABS; + case R_AMDGPU_REL32: + case R_AMDGPU_REL32_LO: + case R_AMDGPU_REL32_HI: + return R_PC; + case R_AMDGPU_GOTPCREL: + case R_AMDGPU_GOTPCREL32_LO: + case R_AMDGPU_GOTPCREL32_HI: + return R_GOT_PC; + default: + fatal("do not know how to handle relocation " + Twine(Type)); + } +} + +ARMTargetInfo::ARMTargetInfo() { + CopyRel = R_ARM_COPY; + RelativeRel = R_ARM_RELATIVE; + IRelativeRel = R_ARM_IRELATIVE; + GotRel = R_ARM_GLOB_DAT; + PltRel = R_ARM_JUMP_SLOT; + TlsGotRel = R_ARM_TLS_TPOFF32; + TlsModuleIndexRel = R_ARM_TLS_DTPMOD32; + TlsOffsetRel = R_ARM_TLS_DTPOFF32; + GotEntrySize = 4; + GotPltEntrySize = 4; + PltEntrySize = 16; + PltHeaderSize = 20; + // ARM uses Variant 1 TLS + TcbSize = 8; + NeedsThunks = true; +} + +RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { + switch (Type) { + default: + return R_ABS; + case R_ARM_THM_JUMP11: + return R_PC; + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_PREL31: + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + case R_ARM_THM_CALL: + return R_PLT_PC; + case R_ARM_GOTOFF32: + // (S + A) - GOT_ORG + return R_GOTREL; + case R_ARM_GOT_BREL: + // GOT(S) + A - GOT_ORG + return R_GOT_OFF; + case R_ARM_GOT_PREL: + case R_ARM_TLS_IE32: + // GOT(S) + A - P + return R_GOT_PC; + case R_ARM_TARGET1: + return Config->Target1Rel ? R_PC : R_ABS; + case R_ARM_TARGET2: + if (Config->Target2 == Target2Policy::Rel) + return R_PC; + if (Config->Target2 == Target2Policy::Abs) + return R_ABS; + return R_GOT_PC; + case R_ARM_TLS_GD32: + return R_TLSGD_PC; + case R_ARM_TLS_LDM32: + return R_TLSLD_PC; + case R_ARM_BASE_PREL: + // B(S) + A - P + // FIXME: currently B(S) assumed to be .got, this may not hold for all + // platforms. + return R_GOTONLY_PC; + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVT_PREL: + case R_ARM_REL32: + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVT_PREL: + return R_PC; + case R_ARM_NONE: + return R_HINT; + case R_ARM_TLS_LE32: + return R_TLS; + } +} + +bool ARMTargetInfo::isPicRel(uint32_t Type) const { + return (Type == R_ARM_TARGET1 && !Config->Target1Rel) || + (Type == R_ARM_ABS32); +} + +uint32_t ARMTargetInfo::getDynRel(uint32_t Type) const { + if (Type == R_ARM_TARGET1 && !Config->Target1Rel) + return R_ARM_ABS32; + if (Type == R_ARM_ABS32) + return Type; + // Keep it going with a dummy value so that we can find more reloc errors. + return R_ARM_ABS32; +} + +void ARMTargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { + write32le(Buf, In<ELF32LE>::Plt->getVA()); +} + +void ARMTargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { + // An ARM entry is the address of the ifunc resolver function. + write32le(Buf, S.getVA<ELF32LE>()); +} + +void ARMTargetInfo::writePltHeader(uint8_t *Buf) const { + const uint8_t PltData[] = { + 0x04, 0xe0, 0x2d, 0xe5, // str lr, [sp,#-4]! + 0x04, 0xe0, 0x9f, 0xe5, // ldr lr, L2 + 0x0e, 0xe0, 0x8f, 0xe0, // L1: add lr, pc, lr + 0x08, 0xf0, 0xbe, 0xe5, // ldr pc, [lr, #8] + 0x00, 0x00, 0x00, 0x00, // L2: .word &(.got.plt) - L1 - 8 + }; + memcpy(Buf, PltData, sizeof(PltData)); + uint64_t GotPlt = In<ELF32LE>::GotPlt->getVA(); + uint64_t L1 = In<ELF32LE>::Plt->getVA() + 8; + write32le(Buf + 16, GotPlt - L1 - 8); +} + +void ARMTargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + // FIXME: Using simple code sequence with simple relocations. + // There is a more optimal sequence but it requires support for the group + // relocations. See ELF for the ARM Architecture Appendix A.3 + const uint8_t PltData[] = { + 0x04, 0xc0, 0x9f, 0xe5, // ldr ip, L2 + 0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc + 0x00, 0xf0, 0x9c, 0xe5, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00, // L2: .word Offset(&(.plt.got) - L1 - 8 + }; + memcpy(Buf, PltData, sizeof(PltData)); + uint64_t L1 = PltEntryAddr + 4; + write32le(Buf + 12, GotEntryAddr - L1 - 8); +} + +RelExpr ARMTargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType, + const InputFile &File, + const SymbolBody &S) const { + // If S is an undefined weak symbol in an executable we don't need a Thunk. + // In a DSO calls to undefined symbols, including weak ones get PLT entries + // which may need a thunk. + if (S.isUndefined() && !S.isLocal() && S.symbol()->isWeak() + && !Config->Shared) + return Expr; + // A state change from ARM to Thumb and vice versa must go through an + // interworking thunk if the relocation type is not R_ARM_CALL or + // R_ARM_THM_CALL. + switch (RelocType) { + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_JUMP24: + // Source is ARM, all PLT entries are ARM so no interworking required. + // Otherwise we need to interwork if Symbol has bit 0 set (Thumb). + if (Expr == R_PC && ((S.getVA<ELF32LE>() & 1) == 1)) + return R_THUNK_PC; + break; + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + // Source is Thumb, all PLT entries are ARM so interworking is required. + // Otherwise we need to interwork if Symbol has bit 0 clear (ARM). + if (Expr == R_PLT_PC) + return R_THUNK_PLT_PC; + if ((S.getVA<ELF32LE>() & 1) == 0) + return R_THUNK_PC; + break; + } + return Expr; +} + +void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + switch (Type) { + case R_ARM_ABS32: + case R_ARM_BASE_PREL: + case R_ARM_GLOB_DAT: + case R_ARM_GOTOFF32: + case R_ARM_GOT_BREL: + case R_ARM_GOT_PREL: + case R_ARM_REL32: + case R_ARM_RELATIVE: + case R_ARM_TARGET1: + case R_ARM_TARGET2: + case R_ARM_TLS_GD32: + case R_ARM_TLS_IE32: + case R_ARM_TLS_LDM32: + case R_ARM_TLS_LDO32: + case R_ARM_TLS_LE32: + case R_ARM_TLS_TPOFF32: + write32le(Loc, Val); + break; + case R_ARM_TLS_DTPMOD32: + write32le(Loc, 1); + break; + case R_ARM_PREL31: + checkInt<31>(Loc, Val, Type); + write32le(Loc, (read32le(Loc) & 0x80000000) | (Val & ~0x80000000)); + break; + case R_ARM_CALL: + // R_ARM_CALL is used for BL and BLX instructions, depending on the + // value of bit 0 of Val, we must select a BL or BLX instruction + if (Val & 1) { + // If bit 0 of Val is 1 the target is Thumb, we must select a BLX. + // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1' + checkInt<26>(Loc, Val, Type); + write32le(Loc, 0xfa000000 | // opcode + ((Val & 2) << 23) | // H + ((Val >> 2) & 0x00ffffff)); // imm24 + break; + } + if ((read32le(Loc) & 0xfe000000) == 0xfa000000) + // BLX (always unconditional) instruction to an ARM Target, select an + // unconditional BL. + write32le(Loc, 0xeb000000 | (read32le(Loc) & 0x00ffffff)); + // fall through as BL encoding is shared with B + case R_ARM_JUMP24: + case R_ARM_PC24: + case R_ARM_PLT32: + checkInt<26>(Loc, Val, Type); + write32le(Loc, (read32le(Loc) & ~0x00ffffff) | ((Val >> 2) & 0x00ffffff)); + break; + case R_ARM_THM_JUMP11: + checkInt<12>(Loc, Val, Type); + write16le(Loc, (read32le(Loc) & 0xf800) | ((Val >> 1) & 0x07ff)); + break; + case R_ARM_THM_JUMP19: + // Encoding T3: Val = S:J2:J1:imm6:imm11:0 + checkInt<21>(Loc, Val, Type); + write16le(Loc, + (read16le(Loc) & 0xfbc0) | // opcode cond + ((Val >> 10) & 0x0400) | // S + ((Val >> 12) & 0x003f)); // imm6 + write16le(Loc + 2, + 0x8000 | // opcode + ((Val >> 8) & 0x0800) | // J2 + ((Val >> 5) & 0x2000) | // J1 + ((Val >> 1) & 0x07ff)); // imm11 + break; + case R_ARM_THM_CALL: + // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the + // value of bit 0 of Val, we must select a BL or BLX instruction + if ((Val & 1) == 0) { + // Ensure BLX destination is 4-byte aligned. As BLX instruction may + // only be two byte aligned. This must be done before overflow check + Val = alignTo(Val, 4); + } + // Bit 12 is 0 for BLX, 1 for BL + write16le(Loc + 2, (read16le(Loc + 2) & ~0x1000) | (Val & 1) << 12); + // Fall through as rest of encoding is the same as B.W + case R_ARM_THM_JUMP24: + // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0 + // FIXME: Use of I1 and I2 require v6T2ops + checkInt<25>(Loc, Val, Type); + write16le(Loc, + 0xf000 | // opcode + ((Val >> 14) & 0x0400) | // S + ((Val >> 12) & 0x03ff)); // imm10 + write16le(Loc + 2, + (read16le(Loc + 2) & 0xd000) | // opcode + (((~(Val >> 10)) ^ (Val >> 11)) & 0x2000) | // J1 + (((~(Val >> 11)) ^ (Val >> 13)) & 0x0800) | // J2 + ((Val >> 1) & 0x07ff)); // imm11 + break; + case R_ARM_MOVW_ABS_NC: + case R_ARM_MOVW_PREL_NC: + write32le(Loc, (read32le(Loc) & ~0x000f0fff) | ((Val & 0xf000) << 4) | + (Val & 0x0fff)); + break; + case R_ARM_MOVT_ABS: + case R_ARM_MOVT_PREL: + checkInt<32>(Loc, Val, Type); + write32le(Loc, (read32le(Loc) & ~0x000f0fff) | + (((Val >> 16) & 0xf000) << 4) | ((Val >> 16) & 0xfff)); + break; + case R_ARM_THM_MOVT_ABS: + case R_ARM_THM_MOVT_PREL: + // Encoding T1: A = imm4:i:imm3:imm8 + checkInt<32>(Loc, Val, Type); + write16le(Loc, + 0xf2c0 | // opcode + ((Val >> 17) & 0x0400) | // i + ((Val >> 28) & 0x000f)); // imm4 + write16le(Loc + 2, + (read16le(Loc + 2) & 0x8f00) | // opcode + ((Val >> 12) & 0x7000) | // imm3 + ((Val >> 16) & 0x00ff)); // imm8 + break; + case R_ARM_THM_MOVW_ABS_NC: + case R_ARM_THM_MOVW_PREL_NC: + // Encoding T3: A = imm4:i:imm3:imm8 + write16le(Loc, + 0xf240 | // opcode + ((Val >> 1) & 0x0400) | // i + ((Val >> 12) & 0x000f)); // imm4 + write16le(Loc + 2, + (read16le(Loc + 2) & 0x8f00) | // opcode + ((Val << 4) & 0x7000) | // imm3 + (Val & 0x00ff)); // imm8 + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +uint64_t ARMTargetInfo::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { + switch (Type) { + default: + return 0; + case R_ARM_ABS32: + case R_ARM_BASE_PREL: + case R_ARM_GOTOFF32: + case R_ARM_GOT_BREL: + case R_ARM_GOT_PREL: + case R_ARM_REL32: + case R_ARM_TARGET1: + case R_ARM_TARGET2: + case R_ARM_TLS_GD32: + case R_ARM_TLS_LDM32: + case R_ARM_TLS_LDO32: + case R_ARM_TLS_IE32: + case R_ARM_TLS_LE32: + return SignExtend64<32>(read32le(Buf)); + case R_ARM_PREL31: + return SignExtend64<31>(read32le(Buf)); + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PC24: + case R_ARM_PLT32: + return SignExtend64<26>(read32le(Buf) << 2); + case R_ARM_THM_JUMP11: + return SignExtend64<12>(read16le(Buf) << 1); + case R_ARM_THM_JUMP19: { + // Encoding T3: A = S:J2:J1:imm10:imm6:0 + uint16_t Hi = read16le(Buf); + uint16_t Lo = read16le(Buf + 2); + return SignExtend64<20>(((Hi & 0x0400) << 10) | // S + ((Lo & 0x0800) << 8) | // J2 + ((Lo & 0x2000) << 5) | // J1 + ((Hi & 0x003f) << 12) | // imm6 + ((Lo & 0x07ff) << 1)); // imm11:0 + } + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: { + // Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0 + // I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S) + // FIXME: I1 and I2 require v6T2ops + uint16_t Hi = read16le(Buf); + uint16_t Lo = read16le(Buf + 2); + return SignExtend64<24>(((Hi & 0x0400) << 14) | // S + (~((Lo ^ (Hi << 3)) << 10) & 0x00800000) | // I1 + (~((Lo ^ (Hi << 1)) << 11) & 0x00400000) | // I2 + ((Hi & 0x003ff) << 12) | // imm0 + ((Lo & 0x007ff) << 1)); // imm11:0 + } + // ELF for the ARM Architecture 4.6.1.1 the implicit addend for MOVW and + // MOVT is in the range -32768 <= A < 32768 + case R_ARM_MOVW_ABS_NC: + case R_ARM_MOVT_ABS: + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVT_PREL: { + uint64_t Val = read32le(Buf) & 0x000f0fff; + return SignExtend64<16>(((Val & 0x000f0000) >> 4) | (Val & 0x00fff)); + } + case R_ARM_THM_MOVW_ABS_NC: + case R_ARM_THM_MOVT_ABS: + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVT_PREL: { + // Encoding T3: A = imm4:i:imm3:imm8 + uint16_t Hi = read16le(Buf); + uint16_t Lo = read16le(Buf + 2); + return SignExtend64<16>(((Hi & 0x000f) << 12) | // imm4 + ((Hi & 0x0400) << 1) | // i + ((Lo & 0x7000) >> 4) | // imm3 + (Lo & 0x00ff)); // imm8 + } + } +} + +bool ARMTargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { + return Type == R_ARM_TLS_LDO32 || Type == R_ARM_TLS_LDM32; +} + +bool ARMTargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { + return Type == R_ARM_TLS_GD32; +} + +bool ARMTargetInfo::isTlsInitialExecRel(uint32_t Type) const { + return Type == R_ARM_TLS_IE32; +} + +template <class ELFT> MipsTargetInfo<ELFT>::MipsTargetInfo() { + GotPltHeaderEntriesNum = 2; + DefaultMaxPageSize = 65536; + GotEntrySize = sizeof(typename ELFT::uint); + GotPltEntrySize = sizeof(typename ELFT::uint); + PltEntrySize = 16; + PltHeaderSize = 32; + CopyRel = R_MIPS_COPY; + PltRel = R_MIPS_JUMP_SLOT; + NeedsThunks = true; + if (ELFT::Is64Bits) { + RelativeRel = (R_MIPS_64 << 8) | R_MIPS_REL32; + TlsGotRel = R_MIPS_TLS_TPREL64; + TlsModuleIndexRel = R_MIPS_TLS_DTPMOD64; + TlsOffsetRel = R_MIPS_TLS_DTPREL64; + } else { + RelativeRel = R_MIPS_REL32; + TlsGotRel = R_MIPS_TLS_TPREL32; + TlsModuleIndexRel = R_MIPS_TLS_DTPMOD32; + TlsOffsetRel = R_MIPS_TLS_DTPREL32; + } +} + +template <class ELFT> +RelExpr MipsTargetInfo<ELFT>::getRelExpr(uint32_t Type, + const SymbolBody &S) const { + // See comment in the calculateMipsRelChain. + if (ELFT::Is64Bits || Config->MipsN32Abi) + Type &= 0xff; + switch (Type) { + default: + return R_ABS; + case R_MIPS_JALR: + return R_HINT; + case R_MIPS_GPREL16: + case R_MIPS_GPREL32: + return R_MIPS_GOTREL; + case R_MIPS_26: + return R_PLT; + case R_MIPS_HI16: + case R_MIPS_LO16: + case R_MIPS_GOT_OFST: + // R_MIPS_HI16/R_MIPS_LO16 relocations against _gp_disp calculate + // offset between start of function and 'gp' value which by default + // equal to the start of .got section. In that case we consider these + // relocations as relative. + if (&S == ElfSym<ELFT>::MipsGpDisp) + return R_PC; + return R_ABS; + case R_MIPS_PC32: + case R_MIPS_PC16: + case R_MIPS_PC19_S2: + case R_MIPS_PC21_S2: + case R_MIPS_PC26_S2: + case R_MIPS_PCHI16: + case R_MIPS_PCLO16: + return R_PC; + case R_MIPS_GOT16: + if (S.isLocal()) + return R_MIPS_GOT_LOCAL_PAGE; + // fallthrough + case R_MIPS_CALL16: + case R_MIPS_GOT_DISP: + case R_MIPS_TLS_GOTTPREL: + return R_MIPS_GOT_OFF; + case R_MIPS_CALL_HI16: + case R_MIPS_CALL_LO16: + case R_MIPS_GOT_HI16: + case R_MIPS_GOT_LO16: + return R_MIPS_GOT_OFF32; + case R_MIPS_GOT_PAGE: + return R_MIPS_GOT_LOCAL_PAGE; + case R_MIPS_TLS_GD: + return R_MIPS_TLSGD; + case R_MIPS_TLS_LDM: + return R_MIPS_TLSLD; + } +} + +template <class ELFT> bool MipsTargetInfo<ELFT>::isPicRel(uint32_t Type) const { + return Type == R_MIPS_32 || Type == R_MIPS_64; +} + +template <class ELFT> +uint32_t MipsTargetInfo<ELFT>::getDynRel(uint32_t Type) const { + return RelativeRel; +} + +template <class ELFT> +bool MipsTargetInfo<ELFT>::isTlsLocalDynamicRel(uint32_t Type) const { + return Type == R_MIPS_TLS_LDM; +} + +template <class ELFT> +bool MipsTargetInfo<ELFT>::isTlsGlobalDynamicRel(uint32_t Type) const { + return Type == R_MIPS_TLS_GD; +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { + write32<ELFT::TargetEndianness>(Buf, In<ELFT>::Plt->getVA()); +} + +template <endianness E, uint8_t BSIZE, uint8_t SHIFT> +static int64_t getPcRelocAddend(const uint8_t *Loc) { + uint32_t Instr = read32<E>(Loc); + uint32_t Mask = 0xffffffff >> (32 - BSIZE); + return SignExtend64<BSIZE + SHIFT>((Instr & Mask) << SHIFT); +} + +template <endianness E, uint8_t BSIZE, uint8_t SHIFT> +static void applyMipsPcReloc(uint8_t *Loc, uint32_t Type, uint64_t V) { + uint32_t Mask = 0xffffffff >> (32 - BSIZE); + uint32_t Instr = read32<E>(Loc); + if (SHIFT > 0) + checkAlignment<(1 << SHIFT)>(Loc, V, Type); + checkInt<BSIZE + SHIFT>(Loc, V, Type); + write32<E>(Loc, (Instr & ~Mask) | ((V >> SHIFT) & Mask)); +} + +template <endianness E> static void writeMipsHi16(uint8_t *Loc, uint64_t V) { + uint32_t Instr = read32<E>(Loc); + uint16_t Res = ((V + 0x8000) >> 16) & 0xffff; + write32<E>(Loc, (Instr & 0xffff0000) | Res); +} + +template <endianness E> static void writeMipsHigher(uint8_t *Loc, uint64_t V) { + uint32_t Instr = read32<E>(Loc); + uint16_t Res = ((V + 0x80008000) >> 32) & 0xffff; + write32<E>(Loc, (Instr & 0xffff0000) | Res); +} + +template <endianness E> static void writeMipsHighest(uint8_t *Loc, uint64_t V) { + uint32_t Instr = read32<E>(Loc); + uint16_t Res = ((V + 0x800080008000) >> 48) & 0xffff; + write32<E>(Loc, (Instr & 0xffff0000) | Res); +} + +template <endianness E> static void writeMipsLo16(uint8_t *Loc, uint64_t V) { + uint32_t Instr = read32<E>(Loc); + write32<E>(Loc, (Instr & 0xffff0000) | (V & 0xffff)); +} + +template <class ELFT> static bool isMipsR6() { + const auto &FirstObj = cast<ELFFileBase<ELFT>>(*Config->FirstElf); + uint32_t Arch = FirstObj.getObj().getHeader()->e_flags & EF_MIPS_ARCH; + return Arch == EF_MIPS_ARCH_32R6 || Arch == EF_MIPS_ARCH_64R6; +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::writePltHeader(uint8_t *Buf) const { + const endianness E = ELFT::TargetEndianness; + if (Config->MipsN32Abi) { + write32<E>(Buf, 0x3c0e0000); // lui $14, %hi(&GOTPLT[0]) + write32<E>(Buf + 4, 0x8dd90000); // lw $25, %lo(&GOTPLT[0])($14) + write32<E>(Buf + 8, 0x25ce0000); // addiu $14, $14, %lo(&GOTPLT[0]) + write32<E>(Buf + 12, 0x030ec023); // subu $24, $24, $14 + } else { + write32<E>(Buf, 0x3c1c0000); // lui $28, %hi(&GOTPLT[0]) + write32<E>(Buf + 4, 0x8f990000); // lw $25, %lo(&GOTPLT[0])($28) + write32<E>(Buf + 8, 0x279c0000); // addiu $28, $28, %lo(&GOTPLT[0]) + write32<E>(Buf + 12, 0x031cc023); // subu $24, $24, $28 + } + write32<E>(Buf + 16, 0x03e07825); // move $15, $31 + write32<E>(Buf + 20, 0x0018c082); // srl $24, $24, 2 + write32<E>(Buf + 24, 0x0320f809); // jalr $25 + write32<E>(Buf + 28, 0x2718fffe); // subu $24, $24, 2 + uint64_t Got = In<ELFT>::GotPlt->getVA(); + writeMipsHi16<E>(Buf, Got); + writeMipsLo16<E>(Buf + 4, Got); + writeMipsLo16<E>(Buf + 8, Got); +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const endianness E = ELFT::TargetEndianness; + write32<E>(Buf, 0x3c0f0000); // lui $15, %hi(.got.plt entry) + write32<E>(Buf + 4, 0x8df90000); // l[wd] $25, %lo(.got.plt entry)($15) + // jr $25 + write32<E>(Buf + 8, isMipsR6<ELFT>() ? 0x03200009 : 0x03200008); + write32<E>(Buf + 12, 0x25f80000); // addiu $24, $15, %lo(.got.plt entry) + writeMipsHi16<E>(Buf, GotEntryAddr); + writeMipsLo16<E>(Buf + 4, GotEntryAddr); + writeMipsLo16<E>(Buf + 12, GotEntryAddr); +} + +template <class ELFT> +RelExpr MipsTargetInfo<ELFT>::getThunkExpr(RelExpr Expr, uint32_t Type, + const InputFile &File, + const SymbolBody &S) const { + // Any MIPS PIC code function is invoked with its address in register $t9. + // So if we have a branch instruction from non-PIC code to the PIC one + // we cannot make the jump directly and need to create a small stubs + // to save the target function address. + // See page 3-38 ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Type != R_MIPS_26) + return Expr; + auto *F = dyn_cast<ELFFileBase<ELFT>>(&File); + if (!F) + return Expr; + // If current file has PIC code, LA25 stub is not required. + if (F->getObj().getHeader()->e_flags & EF_MIPS_PIC) + return Expr; + auto *D = dyn_cast<DefinedRegular<ELFT>>(&S); + // LA25 is required if target file has PIC code + // or target symbol is a PIC symbol. + return D && D->isMipsPIC() ? R_THUNK_ABS : Expr; +} + +template <class ELFT> +uint64_t MipsTargetInfo<ELFT>::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { + const endianness E = ELFT::TargetEndianness; + switch (Type) { + default: + return 0; + case R_MIPS_32: + case R_MIPS_GPREL32: + case R_MIPS_TLS_DTPREL32: + case R_MIPS_TLS_TPREL32: + return read32<E>(Buf); + case R_MIPS_26: + // FIXME (simon): If the relocation target symbol is not a PLT entry + // we should use another expression for calculation: + // ((A << 2) | (P & 0xf0000000)) >> 2 + return SignExtend64<28>((read32<E>(Buf) & 0x3ffffff) << 2); + case R_MIPS_GPREL16: + case R_MIPS_LO16: + case R_MIPS_PCLO16: + case R_MIPS_TLS_DTPREL_HI16: + case R_MIPS_TLS_DTPREL_LO16: + case R_MIPS_TLS_TPREL_HI16: + case R_MIPS_TLS_TPREL_LO16: + return SignExtend64<16>(read32<E>(Buf)); + case R_MIPS_PC16: + return getPcRelocAddend<E, 16, 2>(Buf); + case R_MIPS_PC19_S2: + return getPcRelocAddend<E, 19, 2>(Buf); + case R_MIPS_PC21_S2: + return getPcRelocAddend<E, 21, 2>(Buf); + case R_MIPS_PC26_S2: + return getPcRelocAddend<E, 26, 2>(Buf); + case R_MIPS_PC32: + return getPcRelocAddend<E, 32, 0>(Buf); + } +} + +static std::pair<uint32_t, uint64_t> +calculateMipsRelChain(uint8_t *Loc, uint32_t Type, uint64_t Val) { + // MIPS N64 ABI packs multiple relocations into the single relocation + // record. In general, all up to three relocations can have arbitrary + // types. In fact, Clang and GCC uses only a few combinations. For now, + // we support two of them. That is allow to pass at least all LLVM + // test suite cases. + // <any relocation> / R_MIPS_SUB / R_MIPS_HI16 | R_MIPS_LO16 + // <any relocation> / R_MIPS_64 / R_MIPS_NONE + // The first relocation is a 'real' relocation which is calculated + // using the corresponding symbol's value. The second and the third + // relocations used to modify result of the first one: extend it to + // 64-bit, extract high or low part etc. For details, see part 2.9 Relocation + // at the https://dmz-portal.mips.com/mw/images/8/82/007-4658-001.pdf + uint32_t Type2 = (Type >> 8) & 0xff; + uint32_t Type3 = (Type >> 16) & 0xff; + if (Type2 == R_MIPS_NONE && Type3 == R_MIPS_NONE) + return std::make_pair(Type, Val); + if (Type2 == R_MIPS_64 && Type3 == R_MIPS_NONE) + return std::make_pair(Type2, Val); + if (Type2 == R_MIPS_SUB && (Type3 == R_MIPS_HI16 || Type3 == R_MIPS_LO16)) + return std::make_pair(Type3, -Val); + error(getErrorLocation(Loc) + "unsupported relocations combination " + + Twine(Type)); + return std::make_pair(Type & 0xff, Val); +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + const endianness E = ELFT::TargetEndianness; + // Thread pointer and DRP offsets from the start of TLS data area. + // https://www.linux-mips.org/wiki/NPTL + if (Type == R_MIPS_TLS_DTPREL_HI16 || Type == R_MIPS_TLS_DTPREL_LO16 || + Type == R_MIPS_TLS_DTPREL32 || Type == R_MIPS_TLS_DTPREL64) + Val -= 0x8000; + else if (Type == R_MIPS_TLS_TPREL_HI16 || Type == R_MIPS_TLS_TPREL_LO16 || + Type == R_MIPS_TLS_TPREL32 || Type == R_MIPS_TLS_TPREL64) + Val -= 0x7000; + if (ELFT::Is64Bits || Config->MipsN32Abi) + std::tie(Type, Val) = calculateMipsRelChain(Loc, Type, Val); + switch (Type) { + case R_MIPS_32: + case R_MIPS_GPREL32: + case R_MIPS_TLS_DTPREL32: + case R_MIPS_TLS_TPREL32: + write32<E>(Loc, Val); + break; + case R_MIPS_64: + case R_MIPS_TLS_DTPREL64: + case R_MIPS_TLS_TPREL64: + write64<E>(Loc, Val); + break; + case R_MIPS_26: + write32<E>(Loc, (read32<E>(Loc) & ~0x3ffffff) | ((Val >> 2) & 0x3ffffff)); + break; + case R_MIPS_GOT_DISP: + case R_MIPS_GOT_PAGE: + case R_MIPS_GOT16: + case R_MIPS_GPREL16: + case R_MIPS_TLS_GD: + case R_MIPS_TLS_LDM: + checkInt<16>(Loc, Val, Type); + // fallthrough + case R_MIPS_CALL16: + case R_MIPS_CALL_LO16: + case R_MIPS_GOT_LO16: + case R_MIPS_GOT_OFST: + case R_MIPS_LO16: + case R_MIPS_PCLO16: + case R_MIPS_TLS_DTPREL_LO16: + case R_MIPS_TLS_GOTTPREL: + case R_MIPS_TLS_TPREL_LO16: + writeMipsLo16<E>(Loc, Val); + break; + case R_MIPS_CALL_HI16: + case R_MIPS_GOT_HI16: + case R_MIPS_HI16: + case R_MIPS_PCHI16: + case R_MIPS_TLS_DTPREL_HI16: + case R_MIPS_TLS_TPREL_HI16: + writeMipsHi16<E>(Loc, Val); + break; + case R_MIPS_HIGHER: + writeMipsHigher<E>(Loc, Val); + break; + case R_MIPS_HIGHEST: + writeMipsHighest<E>(Loc, Val); + break; + case R_MIPS_JALR: + // Ignore this optimization relocation for now + break; + case R_MIPS_PC16: + applyMipsPcReloc<E, 16, 2>(Loc, Type, Val); + break; + case R_MIPS_PC19_S2: + applyMipsPcReloc<E, 19, 2>(Loc, Type, Val); + break; + case R_MIPS_PC21_S2: + applyMipsPcReloc<E, 21, 2>(Loc, Type, Val); + break; + case R_MIPS_PC26_S2: + applyMipsPcReloc<E, 26, 2>(Loc, Type, Val); + break; + case R_MIPS_PC32: + applyMipsPcReloc<E, 32, 0>(Loc, Type, Val); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +template <class ELFT> +bool MipsTargetInfo<ELFT>::usesOnlyLowPageBits(uint32_t Type) const { + return Type == R_MIPS_LO16 || Type == R_MIPS_GOT_OFST; +} +} +} diff --git a/contrib/llvm/tools/lld/ELF/Target.h b/contrib/llvm/tools/lld/ELF/Target.h new file mode 100644 index 000000000000..752f9cd5ee4e --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Target.h @@ -0,0 +1,117 @@ +//===- Target.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_TARGET_H +#define LLD_ELF_TARGET_H + +#include "InputSection.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELF.h" + +#include <memory> + +namespace lld { +namespace elf { +class InputFile; +class SymbolBody; + +class TargetInfo { +public: + virtual bool isTlsInitialExecRel(uint32_t Type) const; + virtual bool isTlsLocalDynamicRel(uint32_t Type) const; + virtual bool isTlsGlobalDynamicRel(uint32_t Type) const; + virtual bool isPicRel(uint32_t Type) const { return true; } + virtual uint32_t getDynRel(uint32_t Type) const { return Type; } + virtual void writeGotPltHeader(uint8_t *Buf) const {} + virtual void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {}; + virtual void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const; + virtual uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const; + + // If lazy binding is supported, the first entry of the PLT has code + // to call the dynamic linker to resolve PLT entries the first time + // they are called. This function writes that code. + virtual void writePltHeader(uint8_t *Buf) const {} + + virtual void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const {} + + // Returns true if a relocation only uses the low bits of a value such that + // all those bits are in in the same page. For example, if the relocation + // only uses the low 12 bits in a system with 4k pages. If this is true, the + // bits will always have the same value at runtime and we don't have to emit + // a dynamic relocation. + virtual bool usesOnlyLowPageBits(uint32_t Type) const; + + // Decide whether a Thunk is needed for the relocation from File + // targeting S. Returns one of: + // Expr if there is no Thunk required + // R_THUNK_ABS if thunk is required and expression is absolute + // R_THUNK_PC if thunk is required and expression is pc rel + // R_THUNK_PLT_PC if thunk is required to PLT entry and expression is pc rel + virtual RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, + const InputFile &File, + const SymbolBody &S) const; + virtual RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const = 0; + virtual void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const = 0; + virtual ~TargetInfo(); + + unsigned TlsGdRelaxSkip = 1; + unsigned PageSize = 4096; + unsigned DefaultMaxPageSize = 4096; + + // On FreeBSD x86_64 the first page cannot be mmaped. + // On Linux that is controled by vm.mmap_min_addr. At least on some x86_64 + // installs that is 65536, so the first 15 pages cannot be used. + // Given that, the smallest value that can be used in here is 0x10000. + uint64_t DefaultImageBase = 0x10000; + + uint32_t CopyRel; + uint32_t GotRel; + uint32_t PltRel; + uint32_t RelativeRel; + uint32_t IRelativeRel; + uint32_t TlsDescRel; + uint32_t TlsGotRel; + uint32_t TlsModuleIndexRel; + uint32_t TlsOffsetRel; + unsigned GotEntrySize = 0; + unsigned GotPltEntrySize = 0; + unsigned PltEntrySize; + unsigned PltHeaderSize; + + // At least on x86_64 positions 1 and 2 are used by the first plt entry + // to support lazy loading. + unsigned GotPltHeaderEntriesNum = 3; + + // Set to 0 for variant 2 + unsigned TcbSize = 0; + + bool NeedsThunks = false; + + virtual RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const; + virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; + virtual void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; + virtual void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; + virtual void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; + virtual void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; +}; + +uint64_t getPPC64TocBase(); +uint64_t getAArch64Page(uint64_t Expr); + +extern TargetInfo *Target; +TargetInfo *createTarget(); +} + +std::string toString(uint32_t RelType); +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Threads.h b/contrib/llvm/tools/lld/ELF/Threads.h new file mode 100644 index 000000000000..c03e15253e15 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Threads.h @@ -0,0 +1,90 @@ +//===- Threads.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// LLD supports threads to distribute workloads to multiple cores. Using +// multicore is most effective when more than one core are idle. At the +// last step of a build, it is often the case that a linker is the only +// active process on a computer. So, we are naturally interested in using +// threads wisely to reduce latency to deliver results to users. +// +// That said, we don't want to do "too clever" things using threads. +// Complex multi-threaded algorithms are sometimes extremely hard to +// justify the correctness and can easily mess up the entire design. +// +// Fortunately, when a linker links large programs (when the link time is +// most critical), it spends most of the time to work on massive number of +// small pieces of data of the same kind, and there are opportunities for +// large parallelism there. Here are examples: +// +// - We have hundreds of thousands of input sections that need to be +// copied to a result file at the last step of link. Once we fix a file +// layout, each section can be copied to its destination and its +// relocations can be applied independently. +// +// - We have tens of millions of small strings when constructing a +// mergeable string section. +// +// For the cases such as the former, we can just use parallel_for_each +// instead of std::for_each (or a plain for loop). Because tasks are +// completely independent from each other, we can run them in parallel +// without any coordination between them. That's very easy to understand +// and justify. +// +// For the cases such as the latter, we can use parallel algorithms to +// deal with massive data. We have to write code for a tailored algorithm +// for each problem, but the complexity of multi-threading is isolated in +// a single pass and doesn't affect the linker's overall design. +// +// The above approach seems to be working fairly well. As an example, when +// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to +// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my +// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from +// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the +// speedup is not linear, but as you add more cores, it gets faster. +// +// On a final note, if you are trying to optimize, keep the axiom "don't +// guess, measure!" in mind. Some important passes of the linker are not +// that slow. For example, resolving all symbols is not a very heavy pass, +// although it would be very hard to parallelize it. You want to first +// identify a slow pass and then optimize it. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_THREADS_H +#define LLD_ELF_THREADS_H + +#include "Config.h" + +#include "lld/Core/Parallel.h" +#include <algorithm> +#include <functional> + +namespace lld { +namespace elf { + +template <class IterTy, class FuncTy> +void forEach(IterTy Begin, IterTy End, FuncTy Fn) { + if (Config->Threads) + parallel_for_each(Begin, End, Fn); + else + std::for_each(Begin, End, Fn); +} + +inline void forLoop(size_t Begin, size_t End, std::function<void(size_t)> Fn) { + if (Config->Threads) { + parallel_for(Begin, End, Fn); + } else { + for (size_t I = Begin; I < End; ++I) + Fn(I); + } +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Thunks.cpp b/contrib/llvm/tools/lld/ELF/Thunks.cpp new file mode 100644 index 000000000000..397a0ee66319 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Thunks.cpp @@ -0,0 +1,275 @@ +//===- Thunks.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains Thunk subclasses. +// +// A thunk is a small piece of code written after an input section +// which is used to jump between "incompatible" functions +// such as MIPS PIC and non-PIC or ARM non-Thumb and Thumb functions. +// +// If a jump target is too far and its address doesn't fit to a +// short jump instruction, we need to create a thunk too, but we +// haven't supported it yet. +// +// i386 and x86-64 don't need thunks. +// +//===---------------------------------------------------------------------===// + +#include "Thunks.h" +#include "Config.h" +#include "Error.h" +#include "InputSection.h" +#include "Memory.h" +#include "OutputSections.h" +#include "Symbols.h" +#include "Target.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <cstdint> +#include <cstring> + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; + +namespace lld { +namespace elf { + +namespace { + +// Specific ARM Thunk implementations. The naming convention is: +// Source State, TargetState, Target Requirement, ABS or PI, Range +template <class ELFT> +class ARMToThumbV7ABSLongThunk final : public Thunk<ELFT> { +public: + ARMToThumbV7ABSLongThunk(const SymbolBody &Dest, + const InputSection<ELFT> &Owner) + : Thunk<ELFT>(Dest, Owner) {} + + uint32_t size() const override { return 12; } + void writeTo(uint8_t *Buf) const override; +}; + +template <class ELFT> class ARMToThumbV7PILongThunk final : public Thunk<ELFT> { +public: + ARMToThumbV7PILongThunk(const SymbolBody &Dest, + const InputSection<ELFT> &Owner) + : Thunk<ELFT>(Dest, Owner) {} + + uint32_t size() const override { return 16; } + void writeTo(uint8_t *Buf) const override; +}; + +template <class ELFT> +class ThumbToARMV7ABSLongThunk final : public Thunk<ELFT> { +public: + ThumbToARMV7ABSLongThunk(const SymbolBody &Dest, + const InputSection<ELFT> &Owner) + : Thunk<ELFT>(Dest, Owner) {} + + uint32_t size() const override { return 10; } + void writeTo(uint8_t *Buf) const override; +}; + +template <class ELFT> class ThumbToARMV7PILongThunk final : public Thunk<ELFT> { +public: + ThumbToARMV7PILongThunk(const SymbolBody &Dest, + const InputSection<ELFT> &Owner) + : Thunk<ELFT>(Dest, Owner) {} + + uint32_t size() const override { return 12; } + void writeTo(uint8_t *Buf) const override; +}; + +// MIPS LA25 thunk +template <class ELFT> class MipsThunk final : public Thunk<ELFT> { +public: + MipsThunk(const SymbolBody &Dest, const InputSection<ELFT> &Owner) + : Thunk<ELFT>(Dest, Owner) {} + + uint32_t size() const override { return 16; } + void writeTo(uint8_t *Buf) const override; +}; + +} // end anonymous namespace + +// ARM Target Thunks +template <class ELFT> static uint64_t getARMThunkDestVA(const SymbolBody &S) { + uint64_t V = S.isInPlt() ? S.getPltVA<ELFT>() : S.getVA<ELFT>(); + return SignExtend64<32>(V); +} + +template <class ELFT> +void ARMToThumbV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf) const { + const uint8_t Data[] = { + 0x00, 0xc0, 0x00, 0xe3, // movw ip,:lower16:S + 0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S + 0x1c, 0xff, 0x2f, 0xe1, // bx ip + }; + uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); + memcpy(Buf, Data, sizeof(Data)); + Target->relocateOne(Buf, R_ARM_MOVW_ABS_NC, S); + Target->relocateOne(Buf + 4, R_ARM_MOVT_ABS, S); +} + +template <class ELFT> +void ThumbToARMV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf) const { + const uint8_t Data[] = { + 0x40, 0xf2, 0x00, 0x0c, // movw ip, :lower16:S + 0xc0, 0xf2, 0x00, 0x0c, // movt ip, :upper16:S + 0x60, 0x47, // bx ip + }; + uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); + memcpy(Buf, Data, sizeof(Data)); + Target->relocateOne(Buf, R_ARM_THM_MOVW_ABS_NC, S); + Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_ABS, S); +} + +template <class ELFT> +void ARMToThumbV7PILongThunk<ELFT>::writeTo(uint8_t *Buf) const { + const uint8_t Data[] = { + 0xf0, 0xcf, 0x0f, 0xe3, // P: movw ip,:lower16:S - (P + (L1-P) +8) + 0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S - (P + (L1-P+4) +8) + 0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc + 0x1c, 0xff, 0x2f, 0xe1, // bx r12 + }; + uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); + uint64_t P = this->getVA(); + memcpy(Buf, Data, sizeof(Data)); + Target->relocateOne(Buf, R_ARM_MOVW_PREL_NC, S - P - 16); + Target->relocateOne(Buf + 4, R_ARM_MOVT_PREL, S - P - 12); +} + +template <class ELFT> +void ThumbToARMV7PILongThunk<ELFT>::writeTo(uint8_t *Buf) const { + const uint8_t Data[] = { + 0x4f, 0xf6, 0xf4, 0x7c, // P: movw ip,:lower16:S - (P + (L1-P) + 4) + 0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P+4) + 4) + 0xfc, 0x44, // L1: add r12, pc + 0x60, 0x47, // bx r12 + }; + uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); + uint64_t P = this->getVA(); + memcpy(Buf, Data, sizeof(Data)); + Target->relocateOne(Buf, R_ARM_THM_MOVW_PREL_NC, S - P - 12); + Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_PREL, S - P - 8); +} + +// Write MIPS LA25 thunk code to call PIC function from the non-PIC one. +template <class ELFT> void MipsThunk<ELFT>::writeTo(uint8_t *Buf) const { + const endianness E = ELFT::TargetEndianness; + + uint64_t S = this->Destination.template getVA<ELFT>(); + write32<E>(Buf, 0x3c190000); // lui $25, %hi(func) + write32<E>(Buf + 4, 0x08000000 | (S >> 2)); // j func + write32<E>(Buf + 8, 0x27390000); // addiu $25, $25, %lo(func) + write32<E>(Buf + 12, 0x00000000); // nop + Target->relocateOne(Buf, R_MIPS_HI16, S); + Target->relocateOne(Buf + 8, R_MIPS_LO16, S); +} + +template <class ELFT> +Thunk<ELFT>::Thunk(const SymbolBody &D, const InputSection<ELFT> &O) + : Destination(D), Owner(O), Offset(O.getThunkOff() + O.getThunksSize()) {} + +template <class ELFT> typename ELFT::uint Thunk<ELFT>::getVA() const { + return Owner.OutSec->Addr + Owner.OutSecOff + Offset; +} + +template <class ELFT> Thunk<ELFT>::~Thunk() = default; + +// Creates a thunk for Thumb-ARM interworking. +template <class ELFT> +static Thunk<ELFT> *createThunkArm(uint32_t Reloc, SymbolBody &S, + InputSection<ELFT> &IS) { + // ARM relocations need ARM to Thumb interworking Thunks. + // Thumb relocations need Thumb to ARM relocations. + // Use position independent Thunks if we require position independent code. + switch (Reloc) { + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_JUMP24: + if (Config->Pic) + return new (BAlloc) ARMToThumbV7PILongThunk<ELFT>(S, IS); + return new (BAlloc) ARMToThumbV7ABSLongThunk<ELFT>(S, IS); + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + if (Config->Pic) + return new (BAlloc) ThumbToARMV7PILongThunk<ELFT>(S, IS); + return new (BAlloc) ThumbToARMV7ABSLongThunk<ELFT>(S, IS); + } + fatal("unrecognized relocation type"); +} + +template <class ELFT> +static void addThunkARM(uint32_t Reloc, SymbolBody &S, InputSection<ELFT> &IS) { + // Only one Thunk supported per symbol. + if (S.hasThunk<ELFT>()) + return; + + // ARM Thunks are added to the same InputSection as the relocation. This + // isn't strictly necessary but it makes it more likely that a limited range + // branch can reach the Thunk, and it makes Thunks to the PLT section easier + Thunk<ELFT> *T = createThunkArm(Reloc, S, IS); + IS.addThunk(T); + if (auto *Sym = dyn_cast<DefinedRegular<ELFT>>(&S)) + Sym->ThunkData = T; + else if (auto *Sym = dyn_cast<SharedSymbol<ELFT>>(&S)) + Sym->ThunkData = T; + else if (auto *Sym = dyn_cast<Undefined<ELFT>>(&S)) + Sym->ThunkData = T; + else + fatal("symbol not DefinedRegular or Shared"); +} + +template <class ELFT> +static void addThunkMips(uint32_t RelocType, SymbolBody &S, + InputSection<ELFT> &IS) { + // Only one Thunk supported per symbol. + if (S.hasThunk<ELFT>()) + return; + + // Mips Thunks are added to the InputSection defining S. + auto *R = cast<DefinedRegular<ELFT>>(&S); + auto *Sec = cast<InputSection<ELFT>>(R->Section); + auto *T = new (BAlloc) MipsThunk<ELFT>(S, *Sec); + Sec->addThunk(T); + R->ThunkData = T; +} + +template <class ELFT> +void addThunk(uint32_t RelocType, SymbolBody &S, InputSection<ELFT> &IS) { + if (Config->EMachine == EM_ARM) + addThunkARM<ELFT>(RelocType, S, IS); + else if (Config->EMachine == EM_MIPS) + addThunkMips<ELFT>(RelocType, S, IS); + else + llvm_unreachable("add Thunk only supported for ARM and Mips"); +} + +template void addThunk<ELF32LE>(uint32_t, SymbolBody &, + InputSection<ELF32LE> &); +template void addThunk<ELF32BE>(uint32_t, SymbolBody &, + InputSection<ELF32BE> &); +template void addThunk<ELF64LE>(uint32_t, SymbolBody &, + InputSection<ELF64LE> &); +template void addThunk<ELF64BE>(uint32_t, SymbolBody &, + InputSection<ELF64BE> &); + +template class Thunk<ELF32LE>; +template class Thunk<ELF32BE>; +template class Thunk<ELF64LE>; +template class Thunk<ELF64BE>; + +} // end namespace elf +} // end namespace lld diff --git a/contrib/llvm/tools/lld/ELF/Thunks.h b/contrib/llvm/tools/lld/ELF/Thunks.h new file mode 100644 index 000000000000..b937d7918491 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Thunks.h @@ -0,0 +1,56 @@ +//===- Thunks.h --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_THUNKS_H +#define LLD_ELF_THUNKS_H + +#include "Relocations.h" + +namespace lld { +namespace elf { +class SymbolBody; +template <class ELFT> class InputSection; + +// Class to describe an instance of a Thunk. +// A Thunk is a code-sequence inserted by the linker in between a caller and +// the callee. The relocation to the callee is redirected to the Thunk, which +// after executing transfers control to the callee. Typical uses of Thunks +// include transferring control from non-pi to pi and changing state on +// targets like ARM. +// +// Thunks can be created for DefinedRegular and Shared Symbols. The Thunk +// is stored in a field of the Symbol Destination. +// Thunks to be written to an InputSection are recorded by the InputSection. +template <class ELFT> class Thunk { + typedef typename ELFT::uint uintX_t; + +public: + Thunk(const SymbolBody &Destination, const InputSection<ELFT> &Owner); + virtual ~Thunk(); + + virtual uint32_t size() const { return 0; } + virtual void writeTo(uint8_t *Buf) const {} + uintX_t getVA() const; + +protected: + const SymbolBody &Destination; + const InputSection<ELFT> &Owner; + uint64_t Offset; +}; + +// For a Relocation to symbol S from InputSection Src, create a Thunk and +// update the fields of S and the InputSection that the Thunk body will be +// written to. At present there are implementations for ARM and Mips Thunks. +template <class ELFT> +void addThunk(uint32_t RelocType, SymbolBody &S, InputSection<ELFT> &Src); + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Writer.cpp b/contrib/llvm/tools/lld/ELF/Writer.cpp new file mode 100644 index 000000000000..154de8cf6d18 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Writer.cpp @@ -0,0 +1,1723 @@ +//===- Writer.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Writer.h" +#include "Config.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "OutputSections.h" +#include "Relocations.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include <climits> +#include <thread> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +namespace { +// The writer writes a SymbolTable result to a file. +template <class ELFT> class Writer { +public: + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Ehdr Elf_Ehdr; + typedef typename ELFT::Phdr Elf_Phdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::SymRange Elf_Sym_Range; + typedef typename ELFT::Rela Elf_Rela; + void run(); + +private: + void createSyntheticSections(); + void copyLocalSymbols(); + void addReservedSymbols(); + void addInputSec(InputSectionBase<ELFT> *S); + void createSections(); + void forEachRelSec(std::function<void(InputSectionBase<ELFT> &)> Fn); + void sortSections(); + void finalizeSections(); + void addPredefinedSections(); + + std::vector<PhdrEntry> createPhdrs(); + void removeEmptyPTLoad(); + void addPtArmExid(std::vector<PhdrEntry> &Phdrs); + void assignAddresses(); + void assignFileOffsets(); + void assignFileOffsetsBinary(); + void setPhdrs(); + void fixHeaders(); + void fixSectionAlignments(); + void fixAbsoluteSymbols(); + void openFile(); + void writeHeader(); + void writeSections(); + void writeSectionsBinary(); + void writeBuildId(); + + std::unique_ptr<FileOutputBuffer> Buffer; + + std::vector<OutputSectionBase *> OutputSections; + OutputSectionFactory<ELFT> Factory; + + void addRelIpltSymbols(); + void addStartEndSymbols(); + void addStartStopSymbols(OutputSectionBase *Sec); + uintX_t getEntryAddr(); + OutputSectionBase *findSection(StringRef Name); + + std::vector<PhdrEntry> Phdrs; + + uintX_t FileSize; + uintX_t SectionHeaderOff; + bool AllocateHeader = true; +}; +} // anonymous namespace + +StringRef elf::getOutputSectionName(StringRef Name) { + if (Config->Relocatable) + return Name; + + for (StringRef V : + {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.", + ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", + ".gcc_except_table.", ".tdata.", ".ARM.exidx."}) { + StringRef Prefix = V.drop_back(); + if (Name.startswith(V) || Name == Prefix) + return Prefix; + } + + // CommonSection is identified as "COMMON" in linker scripts. + // By default, it should go to .bss section. + if (Name == "COMMON") + return ".bss"; + + // ".zdebug_" is a prefix for ZLIB-compressed sections. + // Because we decompressed input sections, we want to remove 'z'. + if (Name.startswith(".zdebug_")) + return Saver.save(Twine(".") + Name.substr(2)); + return Name; +} + +template <class ELFT> void elf::reportDiscarded(InputSectionBase<ELFT> *IS) { + if (!Config->PrintGcSections) + return; + errs() << "removing unused section from '" << IS->Name << "' in file '" + << IS->getFile()->getName() << "'\n"; +} + +template <class ELFT> static bool needsInterpSection() { + return !Symtab<ELFT>::X->getSharedFiles().empty() && + !Config->DynamicLinker.empty() && + !Script<ELFT>::X->ignoreInterpSection(); +} + +template <class ELFT> void elf::writeResult() { Writer<ELFT>().run(); } + +template <class ELFT> void Writer<ELFT>::removeEmptyPTLoad() { + auto I = std::remove_if(Phdrs.begin(), Phdrs.end(), [&](const PhdrEntry &P) { + if (P.p_type != PT_LOAD) + return false; + if (!P.First) + return true; + uintX_t Size = P.Last->Addr + P.Last->Size - P.First->Addr; + return Size == 0; + }); + Phdrs.erase(I, Phdrs.end()); +} + +// The main function of the writer. +template <class ELFT> void Writer<ELFT>::run() { + // Create linker-synthesized sections such as .got or .plt. + // Such sections are of type input section. + createSyntheticSections(); + + // We need to create some reserved symbols such as _end. Create them. + if (!Config->Relocatable) + addReservedSymbols(); + + // Some architectures use small displacements for jump instructions. + // It is linker's responsibility to create thunks containing long + // jump instructions if jump targets are too far. Create thunks. + if (Target->NeedsThunks) + forEachRelSec(createThunks<ELFT>); + + // Create output sections. + Script<ELFT>::X->OutputSections = &OutputSections; + if (ScriptConfig->HasSections) { + // If linker script contains SECTIONS commands, let it create sections. + Script<ELFT>::X->processCommands(Factory); + + // Linker scripts may have left some input sections unassigned. + // Assign such sections using the default rule. + Script<ELFT>::X->addOrphanSections(Factory); + } else { + // If linker script does not contain SECTIONS commands, create + // output sections by default rules. We still need to give the + // linker script a chance to run, because it might contain + // non-SECTIONS commands such as ASSERT. + createSections(); + Script<ELFT>::X->processCommands(Factory); + } + + if (Config->Discard != DiscardPolicy::All) + copyLocalSymbols(); + + // Now that we have a complete set of output sections. This function + // completes section contents. For example, we need to add strings + // to the string table, and add entries to .got and .plt. + // finalizeSections does that. + finalizeSections(); + if (ErrorCount) + return; + + if (Config->Relocatable) { + assignFileOffsets(); + } else { + if (ScriptConfig->HasSections) { + Script<ELFT>::X->assignAddresses(Phdrs); + } else { + fixSectionAlignments(); + assignAddresses(); + } + + // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a + // 0 sized region. This has to be done late since only after assignAddresses + // we know the size of the sections. + removeEmptyPTLoad(); + + if (!Config->OFormatBinary) + assignFileOffsets(); + else + assignFileOffsetsBinary(); + + setPhdrs(); + fixAbsoluteSymbols(); + } + + // Write the result down to a file. + openFile(); + if (ErrorCount) + return; + if (!Config->OFormatBinary) { + writeHeader(); + writeSections(); + } else { + writeSectionsBinary(); + } + + // Backfill .note.gnu.build-id section content. This is done at last + // because the content is usually a hash value of the entire output file. + writeBuildId(); + if (ErrorCount) + return; + + if (auto EC = Buffer->commit()) + error(EC, "failed to write to the output file"); + + // Flush the output streams and exit immediately. A full shutdown + // is a good test that we are keeping track of all allocated memory, + // but actually freeing it is a waste of time in a regular linker run. + if (Config->ExitEarly) + exitLld(0); +} + +// Initialize Out<ELFT> members. +template <class ELFT> void Writer<ELFT>::createSyntheticSections() { + // Initialize all pointers with NULL. This is needed because + // you can call lld::elf::main more than once as a library. + memset(&Out<ELFT>::First, 0, sizeof(Out<ELFT>)); + + // Create singleton output sections. + Out<ELFT>::Bss = + make<OutputSection<ELFT>>(".bss", SHT_NOBITS, SHF_ALLOC | SHF_WRITE); + In<ELFT>::DynStrTab = make<StringTableSection<ELFT>>(".dynstr", true); + In<ELFT>::Dynamic = make<DynamicSection<ELFT>>(); + Out<ELFT>::EhFrame = make<EhOutputSection<ELFT>>(); + In<ELFT>::RelaDyn = make<RelocationSection<ELFT>>( + Config->Rela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc); + In<ELFT>::ShStrTab = make<StringTableSection<ELFT>>(".shstrtab", false); + + Out<ELFT>::ElfHeader = make<OutputSectionBase>("", 0, SHF_ALLOC); + Out<ELFT>::ElfHeader->Size = sizeof(Elf_Ehdr); + Out<ELFT>::ProgramHeaders = make<OutputSectionBase>("", 0, SHF_ALLOC); + Out<ELFT>::ProgramHeaders->updateAlignment(sizeof(uintX_t)); + + if (needsInterpSection<ELFT>()) { + In<ELFT>::Interp = createInterpSection<ELFT>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Interp); + } else { + In<ELFT>::Interp = nullptr; + } + + if (!Config->Relocatable) + Symtab<ELFT>::X->Sections.push_back(createCommentSection<ELFT>()); + + if (Config->Strip != StripPolicy::All) { + In<ELFT>::StrTab = make<StringTableSection<ELFT>>(".strtab", false); + In<ELFT>::SymTab = make<SymbolTableSection<ELFT>>(*In<ELFT>::StrTab); + } + + if (Config->BuildId != BuildIdKind::None) { + In<ELFT>::BuildId = make<BuildIdSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::BuildId); + } + + InputSection<ELFT> *Common = createCommonSection<ELFT>(); + if (!Common->Data.empty()) { + In<ELFT>::Common = Common; + Symtab<ELFT>::X->Sections.push_back(Common); + } + + // Add MIPS-specific sections. + bool HasDynSymTab = !Symtab<ELFT>::X->getSharedFiles().empty() || Config->Pic; + if (Config->EMachine == EM_MIPS) { + if (!Config->Shared && HasDynSymTab) { + In<ELFT>::MipsRldMap = make<MipsRldMapSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::MipsRldMap); + } + if (auto *Sec = MipsAbiFlagsSection<ELFT>::create()) + Symtab<ELFT>::X->Sections.push_back(Sec); + if (auto *Sec = MipsOptionsSection<ELFT>::create()) + Symtab<ELFT>::X->Sections.push_back(Sec); + if (auto *Sec = MipsReginfoSection<ELFT>::create()) + Symtab<ELFT>::X->Sections.push_back(Sec); + } + + if (HasDynSymTab) { + In<ELFT>::DynSymTab = make<SymbolTableSection<ELFT>>(*In<ELFT>::DynStrTab); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::DynSymTab); + + In<ELFT>::VerSym = make<VersionTableSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerSym); + + if (!Config->VersionDefinitions.empty()) { + In<ELFT>::VerDef = make<VersionDefinitionSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerDef); + } + + In<ELFT>::VerNeed = make<VersionNeedSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerNeed); + + if (Config->GnuHash) { + In<ELFT>::GnuHashTab = make<GnuHashTableSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GnuHashTab); + } + + if (Config->SysvHash) { + In<ELFT>::HashTab = make<HashTableSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::HashTab); + } + + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Dynamic); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::DynStrTab); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaDyn); + } + + // Add .got. MIPS' .got is so different from the other archs, + // it has its own class. + if (Config->EMachine == EM_MIPS) { + In<ELFT>::MipsGot = make<MipsGotSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::MipsGot); + } else { + In<ELFT>::Got = make<GotSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Got); + } + + In<ELFT>::GotPlt = make<GotPltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GotPlt); + In<ELFT>::IgotPlt = make<IgotPltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::IgotPlt); + + if (Config->GdbIndex) { + In<ELFT>::GdbIndex = make<GdbIndexSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GdbIndex); + } + + // We always need to add rel[a].plt to output if it has entries. + // Even for static linking it can contain R_[*]_IRELATIVE relocations. + In<ELFT>::RelaPlt = make<RelocationSection<ELFT>>( + Config->Rela ? ".rela.plt" : ".rel.plt", false /*Sort*/); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaPlt); + + // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure + // that the IRelative relocations are processed last by the dynamic loader + In<ELFT>::RelaIplt = make<RelocationSection<ELFT>>( + (Config->EMachine == EM_ARM) ? ".rel.dyn" : In<ELFT>::RelaPlt->Name, + false /*Sort*/); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaIplt); + + In<ELFT>::Plt = make<PltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Plt); + In<ELFT>::Iplt = make<IpltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Iplt); + + if (Config->EhFrameHdr) { + In<ELFT>::EhFrameHdr = make<EhFrameHeader<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::EhFrameHdr); + } +} + +template <class ELFT> +static bool shouldKeepInSymtab(InputSectionBase<ELFT> *Sec, StringRef SymName, + const SymbolBody &B) { + if (B.isFile()) + return false; + + // We keep sections in symtab for relocatable output. + if (B.isSection()) + return Config->Relocatable; + + // If sym references a section in a discarded group, don't keep it. + if (Sec == &InputSection<ELFT>::Discarded) + return false; + + if (Config->Discard == DiscardPolicy::None) + return true; + + // In ELF assembly .L symbols are normally discarded by the assembler. + // If the assembler fails to do so, the linker discards them if + // * --discard-locals is used. + // * The symbol is in a SHF_MERGE section, which is normally the reason for + // the assembler keeping the .L symbol. + if (!SymName.startswith(".L") && !SymName.empty()) + return true; + + if (Config->Discard == DiscardPolicy::Locals) + return false; + + return !Sec || !(Sec->Flags & SHF_MERGE); +} + +template <class ELFT> static bool includeInSymtab(const SymbolBody &B) { + if (!B.isLocal() && !B.symbol()->IsUsedInRegularObj) + return false; + + // If --retain-symbols-file is given, we'll keep only symbols listed in that + // file. + if (Config->Discard == DiscardPolicy::RetainFile && + !Config->RetainSymbolsFile.count(B.getName())) + return false; + + if (auto *D = dyn_cast<DefinedRegular<ELFT>>(&B)) { + // Always include absolute symbols. + if (!D->Section) + return true; + // Exclude symbols pointing to garbage-collected sections. + if (!D->Section->Live) + return false; + if (auto *S = dyn_cast<MergeInputSection<ELFT>>(D->Section)) + if (!S->getSectionPiece(D->Value)->Live) + return false; + } + return true; +} + +// Local symbols are not in the linker's symbol table. This function scans +// each object file's symbol table to copy local symbols to the output. +template <class ELFT> void Writer<ELFT>::copyLocalSymbols() { + if (!In<ELFT>::SymTab) + return; + for (elf::ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { + for (SymbolBody *B : F->getLocalSymbols()) { + if (!B->IsLocal) + fatal(toString(F) + + ": broken object: getLocalSymbols returns a non-local symbol"); + auto *DR = dyn_cast<DefinedRegular<ELFT>>(B); + + // No reason to keep local undefined symbol in symtab. + if (!DR) + continue; + if (!includeInSymtab<ELFT>(*B)) + continue; + + InputSectionBase<ELFT> *Sec = DR->Section; + if (!shouldKeepInSymtab<ELFT>(Sec, B->getName(), *B)) + continue; + ++In<ELFT>::SymTab->NumLocals; + if (Config->Relocatable) + B->DynsymIndex = In<ELFT>::SymTab->NumLocals; + F->KeptLocalSyms.push_back(std::make_pair( + DR, In<ELFT>::SymTab->StrTabSec.addString(B->getName()))); + } + } +} + +// PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections that +// we would like to make sure appear is a specific order to maximize their +// coverage by a single signed 16-bit offset from the TOC base pointer. +// Conversely, the special .tocbss section should be first among all SHT_NOBITS +// sections. This will put it next to the loaded special PPC64 sections (and, +// thus, within reach of the TOC base pointer). +static int getPPC64SectionRank(StringRef SectionName) { + return StringSwitch<int>(SectionName) + .Case(".tocbss", 0) + .Case(".branch_lt", 2) + .Case(".toc", 3) + .Case(".toc1", 4) + .Case(".opd", 5) + .Default(1); +} + +template <class ELFT> bool elf::isRelroSection(const OutputSectionBase *Sec) { + if (!Config->ZRelro) + return false; + uint64_t Flags = Sec->Flags; + if (!(Flags & SHF_ALLOC) || !(Flags & SHF_WRITE)) + return false; + if (Flags & SHF_TLS) + return true; + uint32_t Type = Sec->Type; + if (Type == SHT_INIT_ARRAY || Type == SHT_FINI_ARRAY || + Type == SHT_PREINIT_ARRAY) + return true; + if (Sec == In<ELFT>::GotPlt->OutSec) + return Config->ZNow; + if (Sec == In<ELFT>::Dynamic->OutSec) + return true; + if (In<ELFT>::Got && Sec == In<ELFT>::Got->OutSec) + return true; + if (In<ELFT>::MipsGot && Sec == In<ELFT>::MipsGot->OutSec) + return true; + StringRef S = Sec->getName(); + return S == ".data.rel.ro" || S == ".ctors" || S == ".dtors" || S == ".jcr" || + S == ".eh_frame" || S == ".openbsd.randomdata"; +} + +template <class ELFT> +static bool compareSectionsNonScript(const OutputSectionBase *A, + const OutputSectionBase *B) { + // Put .interp first because some loaders want to see that section + // on the first page of the executable file when loaded into memory. + bool AIsInterp = A->getName() == ".interp"; + bool BIsInterp = B->getName() == ".interp"; + if (AIsInterp != BIsInterp) + return AIsInterp; + + // Allocatable sections go first to reduce the total PT_LOAD size and + // so debug info doesn't change addresses in actual code. + bool AIsAlloc = A->Flags & SHF_ALLOC; + bool BIsAlloc = B->Flags & SHF_ALLOC; + if (AIsAlloc != BIsAlloc) + return AIsAlloc; + + // We don't have any special requirements for the relative order of two non + // allocatable sections. + if (!AIsAlloc) + return false; + + // We want to put section specified by -T option first, so we + // can start assigning VA starting from them later. + auto AAddrSetI = Config->SectionStartMap.find(A->getName()); + auto BAddrSetI = Config->SectionStartMap.find(B->getName()); + bool AHasAddrSet = AAddrSetI != Config->SectionStartMap.end(); + bool BHasAddrSet = BAddrSetI != Config->SectionStartMap.end(); + if (AHasAddrSet != BHasAddrSet) + return AHasAddrSet; + if (AHasAddrSet) + return AAddrSetI->second < BAddrSetI->second; + + // We want the read only sections first so that they go in the PT_LOAD + // covering the program headers at the start of the file. + bool AIsWritable = A->Flags & SHF_WRITE; + bool BIsWritable = B->Flags & SHF_WRITE; + if (AIsWritable != BIsWritable) + return BIsWritable; + + if (!Config->SingleRoRx) { + // For a corresponding reason, put non exec sections first (the program + // header PT_LOAD is not executable). + // We only do that if we are not using linker scripts, since with linker + // scripts ro and rx sections are in the same PT_LOAD, so their relative + // order is not important. The same applies for -no-rosegment. + bool AIsExec = A->Flags & SHF_EXECINSTR; + bool BIsExec = B->Flags & SHF_EXECINSTR; + if (AIsExec != BIsExec) + return BIsExec; + } + + // If we got here we know that both A and B are in the same PT_LOAD. + + // The TLS initialization block needs to be a single contiguous block in a R/W + // PT_LOAD, so stick TLS sections directly before R/W sections. The TLS NOBITS + // sections are placed here as they don't take up virtual address space in the + // PT_LOAD. + bool AIsTls = A->Flags & SHF_TLS; + bool BIsTls = B->Flags & SHF_TLS; + if (AIsTls != BIsTls) + return AIsTls; + + // The next requirement we have is to put nobits sections last. The + // reason is that the only thing the dynamic linker will see about + // them is a p_memsz that is larger than p_filesz. Seeing that it + // zeros the end of the PT_LOAD, so that has to correspond to the + // nobits sections. + bool AIsNoBits = A->Type == SHT_NOBITS; + bool BIsNoBits = B->Type == SHT_NOBITS; + if (AIsNoBits != BIsNoBits) + return BIsNoBits; + + // We place RelRo section before plain r/w ones. + bool AIsRelRo = isRelroSection<ELFT>(A); + bool BIsRelRo = isRelroSection<ELFT>(B); + if (AIsRelRo != BIsRelRo) + return AIsRelRo; + + // Some architectures have additional ordering restrictions for sections + // within the same PT_LOAD. + if (Config->EMachine == EM_PPC64) + return getPPC64SectionRank(A->getName()) < + getPPC64SectionRank(B->getName()); + + return false; +} + +// Output section ordering is determined by this function. +template <class ELFT> +static bool compareSections(const OutputSectionBase *A, + const OutputSectionBase *B) { + // For now, put sections mentioned in a linker script first. + int AIndex = Script<ELFT>::X->getSectionIndex(A->getName()); + int BIndex = Script<ELFT>::X->getSectionIndex(B->getName()); + bool AInScript = AIndex != INT_MAX; + bool BInScript = BIndex != INT_MAX; + if (AInScript != BInScript) + return AInScript; + // If both are in the script, use that order. + if (AInScript) + return AIndex < BIndex; + + return compareSectionsNonScript<ELFT>(A, B); +} + +// Program header entry +PhdrEntry::PhdrEntry(unsigned Type, unsigned Flags) { + p_type = Type; + p_flags = Flags; +} + +void PhdrEntry::add(OutputSectionBase *Sec) { + Last = Sec; + if (!First) + First = Sec; + p_align = std::max(p_align, Sec->Addralign); + if (p_type == PT_LOAD) + Sec->FirstInPtLoad = First; +} + +template <class ELFT> +static void addOptionalSynthetic(StringRef Name, OutputSectionBase *Sec, + typename ELFT::uint Val, + uint8_t StOther = STV_HIDDEN) { + if (SymbolBody *S = Symtab<ELFT>::X->find(Name)) + if (S->isUndefined() || S->isShared()) + Symtab<ELFT>::X->addSynthetic(Name, Sec, Val, StOther); +} + +template <class ELFT> +static Symbol *addRegular(StringRef Name, InputSectionBase<ELFT> *Sec, + typename ELFT::uint Value) { + // The linker generated symbols are added as STB_WEAK to allow user defined + // ones to override them. + return Symtab<ELFT>::X->addRegular(Name, STV_HIDDEN, STT_NOTYPE, Value, + /*Size=*/0, STB_WEAK, Sec, + /*File=*/nullptr); +} + +template <class ELFT> +static Symbol *addOptionalRegular(StringRef Name, InputSectionBase<ELFT> *IS, + typename ELFT::uint Value) { + SymbolBody *S = Symtab<ELFT>::X->find(Name); + if (!S) + return nullptr; + if (!S->isUndefined() && !S->isShared()) + return S->symbol(); + return addRegular(Name, IS, Value); +} + +// The beginning and the ending of .rel[a].plt section are marked +// with __rel[a]_iplt_{start,end} symbols if it is a statically linked +// executable. The runtime needs these symbols in order to resolve +// all IRELATIVE relocs on startup. For dynamic executables, we don't +// need these symbols, since IRELATIVE relocs are resolved through GOT +// and PLT. For details, see http://www.airs.com/blog/archives/403. +template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() { + if (In<ELFT>::DynSymTab) + return; + StringRef S = Config->Rela ? "__rela_iplt_start" : "__rel_iplt_start"; + addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, 0); + + S = Config->Rela ? "__rela_iplt_end" : "__rel_iplt_end"; + addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, -1); +} + +// The linker is expected to define some symbols depending on +// the linking result. This function defines such symbols. +template <class ELFT> void Writer<ELFT>::addReservedSymbols() { + if (Config->EMachine == EM_MIPS) { + // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer + // so that it points to an absolute address which by default is relative + // to GOT. Default offset is 0x7ff0. + // See "Global Data Symbols" in Chapter 6 in the following document: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + ElfSym<ELFT>::MipsGp = + Symtab<ELFT>::X->addAbsolute("_gp", STV_HIDDEN, STB_LOCAL); + + // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between + // start of function and 'gp' pointer into GOT. To simplify relocation + // calculation we assign _gp value to it and calculate corresponding + // relocations as relative to this value. + if (Symtab<ELFT>::X->find("_gp_disp")) + ElfSym<ELFT>::MipsGpDisp = + Symtab<ELFT>::X->addAbsolute("_gp_disp", STV_HIDDEN, STB_LOCAL); + + // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' + // pointer. This symbol is used in the code generated by .cpload pseudo-op + // in case of using -mno-shared option. + // https://sourceware.org/ml/binutils/2004-12/msg00094.html + if (Symtab<ELFT>::X->find("__gnu_local_gp")) + ElfSym<ELFT>::MipsLocalGp = + Symtab<ELFT>::X->addAbsolute("__gnu_local_gp", STV_HIDDEN, STB_LOCAL); + } + + // In the assembly for 32 bit x86 the _GLOBAL_OFFSET_TABLE_ symbol + // is magical and is used to produce a R_386_GOTPC relocation. + // The R_386_GOTPC relocation value doesn't actually depend on the + // symbol value, so it could use an index of STN_UNDEF which, according + // to the spec, means the symbol value is 0. + // Unfortunately both gas and MC keep the _GLOBAL_OFFSET_TABLE_ symbol in + // the object file. + // The situation is even stranger on x86_64 where the assembly doesn't + // need the magical symbol, but gas still puts _GLOBAL_OFFSET_TABLE_ as + // an undefined symbol in the .o files. + // Given that the symbol is effectively unused, we just create a dummy + // hidden one to avoid the undefined symbol error. + Symtab<ELFT>::X->addIgnored("_GLOBAL_OFFSET_TABLE_"); + + // __tls_get_addr is defined by the dynamic linker for dynamic ELFs. For + // static linking the linker is required to optimize away any references to + // __tls_get_addr, so it's not defined anywhere. Create a hidden definition + // to avoid the undefined symbol error. As usual special cases are ARM and + // MIPS - the libc for these targets defines __tls_get_addr itself because + // there are no TLS optimizations for these targets. + if (!In<ELFT>::DynSymTab && + (Config->EMachine != EM_MIPS && Config->EMachine != EM_ARM)) + Symtab<ELFT>::X->addIgnored("__tls_get_addr"); + + // If linker script do layout we do not need to create any standart symbols. + if (ScriptConfig->HasSections) + return; + + ElfSym<ELFT>::EhdrStart = Symtab<ELFT>::X->addIgnored("__ehdr_start"); + + auto Define = [this](StringRef S, DefinedRegular<ELFT> *&Sym1, + DefinedRegular<ELFT> *&Sym2) { + Sym1 = Symtab<ELFT>::X->addIgnored(S, STV_DEFAULT); + + // The name without the underscore is not a reserved name, + // so it is defined only when there is a reference against it. + assert(S.startswith("_")); + S = S.substr(1); + if (SymbolBody *B = Symtab<ELFT>::X->find(S)) + if (B->isUndefined()) + Sym2 = Symtab<ELFT>::X->addAbsolute(S, STV_DEFAULT); + }; + + Define("_end", ElfSym<ELFT>::End, ElfSym<ELFT>::End2); + Define("_etext", ElfSym<ELFT>::Etext, ElfSym<ELFT>::Etext2); + Define("_edata", ElfSym<ELFT>::Edata, ElfSym<ELFT>::Edata2); +} + +// Sort input sections by section name suffixes for +// __attribute__((init_priority(N))). +template <class ELFT> static void sortInitFini(OutputSectionBase *S) { + if (S) + reinterpret_cast<OutputSection<ELFT> *>(S)->sortInitFini(); +} + +// Sort input sections by the special rule for .ctors and .dtors. +template <class ELFT> static void sortCtorsDtors(OutputSectionBase *S) { + if (S) + reinterpret_cast<OutputSection<ELFT> *>(S)->sortCtorsDtors(); +} + +// Sort input sections using the list provided by --symbol-ordering-file. +template <class ELFT> +static void sortBySymbolsOrder(ArrayRef<OutputSectionBase *> OutputSections) { + if (Config->SymbolOrderingFile.empty()) + return; + + // Build a map from symbols to their priorities. Symbols that didn't + // appear in the symbol ordering file have the lowest priority 0. + // All explicitly mentioned symbols have negative (higher) priorities. + DenseMap<StringRef, int> SymbolOrder; + int Priority = -Config->SymbolOrderingFile.size(); + for (StringRef S : Config->SymbolOrderingFile) + SymbolOrder.insert({S, Priority++}); + + // Build a map from sections to their priorities. + DenseMap<InputSectionBase<ELFT> *, int> SectionOrder; + for (elf::ObjectFile<ELFT> *File : Symtab<ELFT>::X->getObjectFiles()) { + for (SymbolBody *Body : File->getSymbols()) { + auto *D = dyn_cast<DefinedRegular<ELFT>>(Body); + if (!D || !D->Section) + continue; + int &Priority = SectionOrder[D->Section]; + Priority = std::min(Priority, SymbolOrder.lookup(D->getName())); + } + } + + // Sort sections by priority. + for (OutputSectionBase *Base : OutputSections) + if (auto *Sec = dyn_cast<OutputSection<ELFT>>(Base)) + Sec->sort([&](InputSection<ELFT> *S) { return SectionOrder.lookup(S); }); +} + +template <class ELFT> +void Writer<ELFT>::forEachRelSec( + std::function<void(InputSectionBase<ELFT> &)> Fn) { + for (InputSectionBase<ELFT> *IS : Symtab<ELFT>::X->Sections) { + if (!IS->Live) + continue; + // Scan all relocations. Each relocation goes through a series + // of tests to determine if it needs special treatment, such as + // creating GOT, PLT, copy relocations, etc. + // Note that relocations for non-alloc sections are directly + // processed by InputSection::relocateNonAlloc. + if (!(IS->Flags & SHF_ALLOC)) + continue; + if (isa<InputSection<ELFT>>(IS) || isa<EhInputSection<ELFT>>(IS)) + Fn(*IS); + } +} + +template <class ELFT> +void Writer<ELFT>::addInputSec(InputSectionBase<ELFT> *IS) { + if (!IS) + return; + + if (!IS->Live) { + reportDiscarded(IS); + return; + } + OutputSectionBase *Sec; + bool IsNew; + StringRef OutsecName = getOutputSectionName(IS->Name); + std::tie(Sec, IsNew) = Factory.create(IS, OutsecName); + if (IsNew) + OutputSections.push_back(Sec); + Sec->addSection(IS); +} + +template <class ELFT> void Writer<ELFT>::createSections() { + for (InputSectionBase<ELFT> *IS : Symtab<ELFT>::X->Sections) + addInputSec(IS); + + sortBySymbolsOrder<ELFT>(OutputSections); + sortInitFini<ELFT>(findSection(".init_array")); + sortInitFini<ELFT>(findSection(".fini_array")); + sortCtorsDtors<ELFT>(findSection(".ctors")); + sortCtorsDtors<ELFT>(findSection(".dtors")); + + for (OutputSectionBase *Sec : OutputSections) + Sec->assignOffsets(); +} + +template <class ELFT> +static bool canSharePtLoad(const OutputSectionBase &S1, + const OutputSectionBase &S2) { + if (!(S1.Flags & SHF_ALLOC) || !(S2.Flags & SHF_ALLOC)) + return false; + + bool S1IsWrite = S1.Flags & SHF_WRITE; + bool S2IsWrite = S2.Flags & SHF_WRITE; + if (S1IsWrite != S2IsWrite) + return false; + + if (!S1IsWrite) + return true; // RO and RX share a PT_LOAD with linker scripts. + return (S1.Flags & SHF_EXECINSTR) == (S2.Flags & SHF_EXECINSTR); +} + +template <class ELFT> void Writer<ELFT>::sortSections() { + // Don't sort if using -r. It is not necessary and we want to preserve the + // relative order for SHF_LINK_ORDER sections. + if (Config->Relocatable) + return; + if (!ScriptConfig->HasSections) { + std::stable_sort(OutputSections.begin(), OutputSections.end(), + compareSectionsNonScript<ELFT>); + return; + } + Script<ELFT>::X->adjustSectionsBeforeSorting(); + + // The order of the sections in the script is arbitrary and may not agree with + // compareSectionsNonScript. This means that we cannot easily define a + // strict weak ordering. To see why, consider a comparison of a section in the + // script and one not in the script. We have a two simple options: + // * Make them equivalent (a is not less than b, and b is not less than a). + // The problem is then that equivalence has to be transitive and we can + // have sections a, b and c with only b in a script and a less than c + // which breaks this property. + // * Use compareSectionsNonScript. Given that the script order doesn't have + // to match, we can end up with sections a, b, c, d where b and c are in the + // script and c is compareSectionsNonScript less than b. In which case d + // can be equivalent to c, a to b and d < a. As a concrete example: + // .a (rx) # not in script + // .b (rx) # in script + // .c (ro) # in script + // .d (ro) # not in script + // + // The way we define an order then is: + // * First put script sections at the start and sort the script and + // non-script sections independently. + // * Move each non-script section to its preferred position. We try + // to put each section in the last position where it it can share + // a PT_LOAD. + + std::stable_sort(OutputSections.begin(), OutputSections.end(), + compareSections<ELFT>); + + auto I = OutputSections.begin(); + auto E = OutputSections.end(); + auto NonScriptI = + std::find_if(OutputSections.begin(), E, [](OutputSectionBase *S) { + return Script<ELFT>::X->getSectionIndex(S->getName()) == INT_MAX; + }); + while (NonScriptI != E) { + auto BestPos = std::max_element( + I, NonScriptI, [&](OutputSectionBase *&A, OutputSectionBase *&B) { + bool ACanSharePtLoad = canSharePtLoad<ELFT>(**NonScriptI, *A); + bool BCanSharePtLoad = canSharePtLoad<ELFT>(**NonScriptI, *B); + if (ACanSharePtLoad != BCanSharePtLoad) + return BCanSharePtLoad; + + bool ACmp = compareSectionsNonScript<ELFT>(*NonScriptI, A); + bool BCmp = compareSectionsNonScript<ELFT>(*NonScriptI, B); + if (ACmp != BCmp) + return BCmp; // FIXME: missing test + + size_t PosA = &A - &OutputSections[0]; + size_t PosB = &B - &OutputSections[0]; + return ACmp ? PosA > PosB : PosA < PosB; + }); + + // max_element only returns NonScriptI if the range is empty. If the range + // is not empty we should consider moving the the element forward one + // position. + if (BestPos != NonScriptI && + !compareSectionsNonScript<ELFT>(*NonScriptI, *BestPos)) + ++BestPos; + std::rotate(BestPos, NonScriptI, NonScriptI + 1); + ++NonScriptI; + } + + Script<ELFT>::X->adjustSectionsAfterSorting(); +} + +template <class ELFT> +static void +finalizeSynthetic(const std::vector<SyntheticSection<ELFT> *> &Sections) { + for (SyntheticSection<ELFT> *SS : Sections) + if (SS && SS->OutSec && !SS->empty()) { + SS->finalize(); + SS->OutSec->Size = 0; + SS->OutSec->assignOffsets(); + } +} + +// We need to add input synthetic sections early in createSyntheticSections() +// to make them visible from linkescript side. But not all sections are always +// required to be in output. For example we don't need dynamic section content +// sometimes. This function filters out such unused sections from output. +template <class ELFT> +static void removeUnusedSyntheticSections(std::vector<OutputSectionBase *> &V) { + // Input synthetic sections are placed after all regular ones. We iterate over + // them all and exit at first non-synthetic. + for (InputSectionBase<ELFT> *S : llvm::reverse(Symtab<ELFT>::X->Sections)) { + SyntheticSection<ELFT> *SS = dyn_cast<SyntheticSection<ELFT>>(S); + if (!SS) + return; + if (!SS->empty() || !SS->OutSec) + continue; + + OutputSection<ELFT> *OutSec = cast<OutputSection<ELFT>>(SS->OutSec); + OutSec->Sections.erase( + std::find(OutSec->Sections.begin(), OutSec->Sections.end(), SS)); + // If there is no other sections in output section, remove it from output. + if (OutSec->Sections.empty()) + V.erase(std::find(V.begin(), V.end(), OutSec)); + } +} + +// Create output section objects and add them to OutputSections. +template <class ELFT> void Writer<ELFT>::finalizeSections() { + Out<ELFT>::DebugInfo = findSection(".debug_info"); + Out<ELFT>::PreinitArray = findSection(".preinit_array"); + Out<ELFT>::InitArray = findSection(".init_array"); + Out<ELFT>::FiniArray = findSection(".fini_array"); + + // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop + // symbols for sections, so that the runtime can get the start and end + // addresses of each section by section name. Add such symbols. + if (!Config->Relocatable) { + addStartEndSymbols(); + for (OutputSectionBase *Sec : OutputSections) + addStartStopSymbols(Sec); + } + + // Add _DYNAMIC symbol. Unlike GNU gold, our _DYNAMIC symbol has no type. + // It should be okay as no one seems to care about the type. + // Even the author of gold doesn't remember why gold behaves that way. + // https://sourceware.org/ml/binutils/2002-03/msg00360.html + if (In<ELFT>::DynSymTab) + addRegular("_DYNAMIC", In<ELFT>::Dynamic, 0); + + // Define __rel[a]_iplt_{start,end} symbols if needed. + addRelIpltSymbols(); + + if (!Out<ELFT>::EhFrame->empty()) { + OutputSections.push_back(Out<ELFT>::EhFrame); + Out<ELFT>::EhFrame->finalize(); + } + + // Scan relocations. This must be done after every symbol is declared so that + // we can correctly decide if a dynamic relocation is needed. + forEachRelSec(scanRelocations<ELFT>); + + // Now that we have defined all possible symbols including linker- + // synthesized ones. Visit all symbols to give the finishing touches. + for (Symbol *S : Symtab<ELFT>::X->getSymbols()) { + SymbolBody *Body = S->body(); + + if (!includeInSymtab<ELFT>(*Body)) + continue; + if (In<ELFT>::SymTab) + In<ELFT>::SymTab->addSymbol(Body); + + if (In<ELFT>::DynSymTab && S->includeInDynsym()) { + In<ELFT>::DynSymTab->addSymbol(Body); + if (auto *SS = dyn_cast<SharedSymbol<ELFT>>(Body)) + if (SS->file()->isNeeded()) + In<ELFT>::VerNeed->addSymbol(SS); + } + } + + // Do not proceed if there was an undefined symbol. + if (ErrorCount) + return; + + // So far we have added sections from input object files. + // This function adds linker-created Out<ELFT>::* sections. + addPredefinedSections(); + removeUnusedSyntheticSections<ELFT>(OutputSections); + + sortSections(); + + unsigned I = 1; + for (OutputSectionBase *Sec : OutputSections) { + Sec->SectionIndex = I++; + Sec->ShName = In<ELFT>::ShStrTab->addString(Sec->getName()); + } + + // Binary and relocatable output does not have PHDRS. + // The headers have to be created before finalize as that can influence the + // image base and the dynamic section on mips includes the image base. + if (!Config->Relocatable && !Config->OFormatBinary) { + Phdrs = Script<ELFT>::X->hasPhdrsCommands() ? Script<ELFT>::X->createPhdrs() + : createPhdrs(); + addPtArmExid(Phdrs); + fixHeaders(); + } + + // Fill other section headers. The dynamic table is finalized + // at the end because some tags like RELSZ depend on result + // of finalizing other sections. + for (OutputSectionBase *Sec : OutputSections) + Sec->finalize(); + + // Dynamic section must be the last one in this list and dynamic + // symbol table section (DynSymTab) must be the first one. + finalizeSynthetic<ELFT>( + {In<ELFT>::DynSymTab, In<ELFT>::GnuHashTab, In<ELFT>::HashTab, + In<ELFT>::SymTab, In<ELFT>::ShStrTab, In<ELFT>::StrTab, + In<ELFT>::VerDef, In<ELFT>::DynStrTab, In<ELFT>::GdbIndex, + In<ELFT>::Got, In<ELFT>::MipsGot, In<ELFT>::IgotPlt, + In<ELFT>::GotPlt, In<ELFT>::RelaDyn, In<ELFT>::RelaIplt, + In<ELFT>::RelaPlt, In<ELFT>::Plt, In<ELFT>::Iplt, + In<ELFT>::Plt, In<ELFT>::EhFrameHdr, In<ELFT>::VerSym, + In<ELFT>::VerNeed, In<ELFT>::Dynamic}); +} + +template <class ELFT> void Writer<ELFT>::addPredefinedSections() { + if (Out<ELFT>::Bss->Size > 0) + OutputSections.push_back(Out<ELFT>::Bss); + + auto OS = dyn_cast_or_null<OutputSection<ELFT>>(findSection(".ARM.exidx")); + if (OS && !OS->Sections.empty() && !Config->Relocatable) + OS->addSection(make<ARMExidxSentinelSection<ELFT>>()); + + addInputSec(In<ELFT>::SymTab); + addInputSec(In<ELFT>::ShStrTab); + addInputSec(In<ELFT>::StrTab); +} + +// The linker is expected to define SECNAME_start and SECNAME_end +// symbols for a few sections. This function defines them. +template <class ELFT> void Writer<ELFT>::addStartEndSymbols() { + auto Define = [&](StringRef Start, StringRef End, OutputSectionBase *OS) { + // These symbols resolve to the image base if the section does not exist. + // A special value -1 indicates end of the section. + addOptionalSynthetic<ELFT>(Start, OS, 0); + addOptionalSynthetic<ELFT>(End, OS, OS ? -1 : 0); + }; + + Define("__preinit_array_start", "__preinit_array_end", + Out<ELFT>::PreinitArray); + Define("__init_array_start", "__init_array_end", Out<ELFT>::InitArray); + Define("__fini_array_start", "__fini_array_end", Out<ELFT>::FiniArray); + + if (OutputSectionBase *Sec = findSection(".ARM.exidx")) + Define("__exidx_start", "__exidx_end", Sec); +} + +// If a section name is valid as a C identifier (which is rare because of +// the leading '.'), linkers are expected to define __start_<secname> and +// __stop_<secname> symbols. They are at beginning and end of the section, +// respectively. This is not requested by the ELF standard, but GNU ld and +// gold provide the feature, and used by many programs. +template <class ELFT> +void Writer<ELFT>::addStartStopSymbols(OutputSectionBase *Sec) { + StringRef S = Sec->getName(); + if (!isValidCIdentifier(S)) + return; + addOptionalSynthetic<ELFT>(Saver.save("__start_" + S), Sec, 0, STV_DEFAULT); + addOptionalSynthetic<ELFT>(Saver.save("__stop_" + S), Sec, -1, STV_DEFAULT); +} + +template <class ELFT> +OutputSectionBase *Writer<ELFT>::findSection(StringRef Name) { + for (OutputSectionBase *Sec : OutputSections) + if (Sec->getName() == Name) + return Sec; + return nullptr; +} + +template <class ELFT> static bool needsPtLoad(OutputSectionBase *Sec) { + if (!(Sec->Flags & SHF_ALLOC)) + return false; + + // Don't allocate VA space for TLS NOBITS sections. The PT_TLS PHDR is + // responsible for allocating space for them, not the PT_LOAD that + // contains the TLS initialization image. + if (Sec->Flags & SHF_TLS && Sec->Type == SHT_NOBITS) + return false; + return true; +} + +// Linker scripts are responsible for aligning addresses. Unfortunately, most +// linker scripts are designed for creating two PT_LOADs only, one RX and one +// RW. This means that there is no alignment in the RO to RX transition and we +// cannot create a PT_LOAD there. +template <class ELFT> +static typename ELFT::uint computeFlags(typename ELFT::uint F) { + if (Config->OMagic) + return PF_R | PF_W | PF_X; + if (Config->SingleRoRx && !(F & PF_W)) + return F | PF_X; + return F; +} + +// Decide which program headers to create and which sections to include in each +// one. +template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { + std::vector<PhdrEntry> Ret; + auto AddHdr = [&](unsigned Type, unsigned Flags) -> PhdrEntry * { + Ret.emplace_back(Type, Flags); + return &Ret.back(); + }; + + // The first phdr entry is PT_PHDR which describes the program header itself. + PhdrEntry &Hdr = *AddHdr(PT_PHDR, PF_R); + Hdr.add(Out<ELFT>::ProgramHeaders); + + // PT_INTERP must be the second entry if exists. + if (OutputSectionBase *Sec = findSection(".interp")) { + PhdrEntry &Hdr = *AddHdr(PT_INTERP, Sec->getPhdrFlags()); + Hdr.add(Sec); + } + + // Add the first PT_LOAD segment for regular output sections. + uintX_t Flags = computeFlags<ELFT>(PF_R); + PhdrEntry *Load = AddHdr(PT_LOAD, Flags); + + PhdrEntry TlsHdr(PT_TLS, PF_R); + PhdrEntry RelRo(PT_GNU_RELRO, PF_R); + PhdrEntry Note(PT_NOTE, PF_R); + for (OutputSectionBase *Sec : OutputSections) { + if (!(Sec->Flags & SHF_ALLOC)) + break; + + // If we meet TLS section then we create TLS header + // and put all TLS sections inside for further use when + // assign addresses. + if (Sec->Flags & SHF_TLS) + TlsHdr.add(Sec); + + if (!needsPtLoad<ELFT>(Sec)) + continue; + + // Segments are contiguous memory regions that has the same attributes + // (e.g. executable or writable). There is one phdr for each segment. + // Therefore, we need to create a new phdr when the next section has + // different flags or is loaded at a discontiguous address using AT linker + // script command. + uintX_t NewFlags = computeFlags<ELFT>(Sec->getPhdrFlags()); + if (Script<ELFT>::X->hasLMA(Sec->getName()) || Flags != NewFlags) { + Load = AddHdr(PT_LOAD, NewFlags); + Flags = NewFlags; + } + + Load->add(Sec); + + if (isRelroSection<ELFT>(Sec)) + RelRo.add(Sec); + if (Sec->Type == SHT_NOTE) + Note.add(Sec); + } + + // Add the TLS segment unless it's empty. + if (TlsHdr.First) + Ret.push_back(std::move(TlsHdr)); + + // Add an entry for .dynamic. + if (In<ELFT>::DynSymTab) { + PhdrEntry &H = + *AddHdr(PT_DYNAMIC, In<ELFT>::Dynamic->OutSec->getPhdrFlags()); + H.add(In<ELFT>::Dynamic->OutSec); + } + + // PT_GNU_RELRO includes all sections that should be marked as + // read-only by dynamic linker after proccessing relocations. + if (RelRo.First) + Ret.push_back(std::move(RelRo)); + + // PT_GNU_EH_FRAME is a special section pointing on .eh_frame_hdr. + if (!Out<ELFT>::EhFrame->empty() && In<ELFT>::EhFrameHdr) { + PhdrEntry &Hdr = + *AddHdr(PT_GNU_EH_FRAME, In<ELFT>::EhFrameHdr->OutSec->getPhdrFlags()); + Hdr.add(In<ELFT>::EhFrameHdr->OutSec); + } + + // PT_OPENBSD_RANDOMIZE specifies the location and size of a part of the + // memory image of the program that must be filled with random data before any + // code in the object is executed. + if (OutputSectionBase *Sec = findSection(".openbsd.randomdata")) { + PhdrEntry &Hdr = *AddHdr(PT_OPENBSD_RANDOMIZE, Sec->getPhdrFlags()); + Hdr.add(Sec); + } + + // PT_GNU_STACK is a special section to tell the loader to make the + // pages for the stack non-executable. + if (!Config->ZExecstack) { + PhdrEntry &Hdr = *AddHdr(PT_GNU_STACK, PF_R | PF_W); + if (Config->ZStackSize != uint64_t(-1)) + Hdr.p_memsz = Config->ZStackSize; + } + + // PT_OPENBSD_WXNEEDED is a OpenBSD-specific header to mark the executable + // is expected to perform W^X violations, such as calling mprotect(2) or + // mmap(2) with PROT_WRITE | PROT_EXEC, which is prohibited by default on + // OpenBSD. + if (Config->ZWxneeded) + AddHdr(PT_OPENBSD_WXNEEDED, PF_X); + + if (Note.First) + Ret.push_back(std::move(Note)); + return Ret; +} + +template <class ELFT> +void Writer<ELFT>::addPtArmExid(std::vector<PhdrEntry> &Phdrs) { + if (Config->EMachine != EM_ARM) + return; + auto I = std::find_if( + OutputSections.begin(), OutputSections.end(), + [](OutputSectionBase *Sec) { return Sec->Type == SHT_ARM_EXIDX; }); + if (I == OutputSections.end()) + return; + + // PT_ARM_EXIDX is the ARM EHABI equivalent of PT_GNU_EH_FRAME + PhdrEntry ARMExidx(PT_ARM_EXIDX, PF_R); + ARMExidx.add(*I); + Phdrs.push_back(ARMExidx); +} + +// The first section of each PT_LOAD and the first section after PT_GNU_RELRO +// have to be page aligned so that the dynamic linker can set the permissions. +template <class ELFT> void Writer<ELFT>::fixSectionAlignments() { + for (const PhdrEntry &P : Phdrs) + if (P.p_type == PT_LOAD && P.First) + P.First->PageAlign = true; + + for (const PhdrEntry &P : Phdrs) { + if (P.p_type != PT_GNU_RELRO) + continue; + // Find the first section after PT_GNU_RELRO. If it is in a PT_LOAD we + // have to align it to a page. + auto End = OutputSections.end(); + auto I = std::find(OutputSections.begin(), End, P.Last); + if (I == End || (I + 1) == End) + continue; + OutputSectionBase *Sec = *(I + 1); + if (needsPtLoad<ELFT>(Sec)) + Sec->PageAlign = true; + } +} + +template <class ELFT> +void elf::allocateHeaders(MutableArrayRef<PhdrEntry> Phdrs, + ArrayRef<OutputSectionBase *> OutputSections) { + auto FirstPTLoad = + std::find_if(Phdrs.begin(), Phdrs.end(), + [](const PhdrEntry &E) { return E.p_type == PT_LOAD; }); + if (FirstPTLoad == Phdrs.end()) + return; + if (FirstPTLoad->First) + for (OutputSectionBase *Sec : OutputSections) + if (Sec->FirstInPtLoad == FirstPTLoad->First) + Sec->FirstInPtLoad = Out<ELFT>::ElfHeader; + FirstPTLoad->First = Out<ELFT>::ElfHeader; + if (!FirstPTLoad->Last) + FirstPTLoad->Last = Out<ELFT>::ProgramHeaders; +} + +// We should set file offsets and VAs for elf header and program headers +// sections. These are special, we do not include them into output sections +// list, but have them to simplify the code. +template <class ELFT> void Writer<ELFT>::fixHeaders() { + Out<ELFT>::ProgramHeaders->Size = sizeof(Elf_Phdr) * Phdrs.size(); + // If the script has SECTIONS, assignAddresses will compute the values. + if (ScriptConfig->HasSections) + return; + + uintX_t HeaderSize = getHeaderSize<ELFT>(); + // When -T<section> option is specified, lower the base to make room for those + // sections. + if (!Config->SectionStartMap.empty()) { + uint64_t Min = -1; + for (const auto &P : Config->SectionStartMap) + Min = std::min(Min, P.second); + if (HeaderSize < Min) + Min -= HeaderSize; + else + AllocateHeader = false; + if (Min < Config->ImageBase) + Config->ImageBase = alignDown(Min, Config->MaxPageSize); + } + + if (AllocateHeader) + allocateHeaders<ELFT>(Phdrs, OutputSections); + + uintX_t BaseVA = Config->ImageBase; + Out<ELFT>::ElfHeader->Addr = BaseVA; + Out<ELFT>::ProgramHeaders->Addr = BaseVA + Out<ELFT>::ElfHeader->Size; +} + +// Assign VAs (addresses at run-time) to output sections. +template <class ELFT> void Writer<ELFT>::assignAddresses() { + uintX_t VA = Config->ImageBase; + if (AllocateHeader) + VA += getHeaderSize<ELFT>(); + uintX_t ThreadBssOffset = 0; + for (OutputSectionBase *Sec : OutputSections) { + uintX_t Alignment = Sec->Addralign; + if (Sec->PageAlign) + Alignment = std::max<uintX_t>(Alignment, Config->MaxPageSize); + + auto I = Config->SectionStartMap.find(Sec->getName()); + if (I != Config->SectionStartMap.end()) + VA = I->second; + + // We only assign VAs to allocated sections. + if (needsPtLoad<ELFT>(Sec)) { + VA = alignTo(VA, Alignment); + Sec->Addr = VA; + VA += Sec->Size; + } else if (Sec->Flags & SHF_TLS && Sec->Type == SHT_NOBITS) { + uintX_t TVA = VA + ThreadBssOffset; + TVA = alignTo(TVA, Alignment); + Sec->Addr = TVA; + ThreadBssOffset = TVA - VA + Sec->Size; + } + } +} + +// Adjusts the file alignment for a given output section and returns +// its new file offset. The file offset must be the same with its +// virtual address (modulo the page size) so that the loader can load +// executables without any address adjustment. +template <class ELFT, class uintX_t> +static uintX_t getFileAlignment(uintX_t Off, OutputSectionBase *Sec) { + OutputSectionBase *First = Sec->FirstInPtLoad; + // If the section is not in a PT_LOAD, we just have to align it. + if (!First) + return alignTo(Off, Sec->Addralign); + + // The first section in a PT_LOAD has to have congruent offset and address + // module the page size. + if (Sec == First) + return alignTo(Off, Config->MaxPageSize, Sec->Addr); + + // If two sections share the same PT_LOAD the file offset is calculated + // using this formula: Off2 = Off1 + (VA2 - VA1). + return First->Offset + Sec->Addr - First->Addr; +} + +template <class ELFT, class uintX_t> +void setOffset(OutputSectionBase *Sec, uintX_t &Off) { + if (Sec->Type == SHT_NOBITS) { + Sec->Offset = Off; + return; + } + + Off = getFileAlignment<ELFT>(Off, Sec); + Sec->Offset = Off; + Off += Sec->Size; +} + +template <class ELFT> void Writer<ELFT>::assignFileOffsetsBinary() { + uintX_t Off = 0; + for (OutputSectionBase *Sec : OutputSections) + if (Sec->Flags & SHF_ALLOC) + setOffset<ELFT>(Sec, Off); + FileSize = alignTo(Off, sizeof(uintX_t)); +} + +// Assign file offsets to output sections. +template <class ELFT> void Writer<ELFT>::assignFileOffsets() { + uintX_t Off = 0; + setOffset<ELFT>(Out<ELFT>::ElfHeader, Off); + setOffset<ELFT>(Out<ELFT>::ProgramHeaders, Off); + + for (OutputSectionBase *Sec : OutputSections) + setOffset<ELFT>(Sec, Off); + + SectionHeaderOff = alignTo(Off, sizeof(uintX_t)); + FileSize = SectionHeaderOff + (OutputSections.size() + 1) * sizeof(Elf_Shdr); +} + +// Finalize the program headers. We call this function after we assign +// file offsets and VAs to all sections. +template <class ELFT> void Writer<ELFT>::setPhdrs() { + for (PhdrEntry &P : Phdrs) { + OutputSectionBase *First = P.First; + OutputSectionBase *Last = P.Last; + if (First) { + P.p_filesz = Last->Offset - First->Offset; + if (Last->Type != SHT_NOBITS) + P.p_filesz += Last->Size; + P.p_memsz = Last->Addr + Last->Size - First->Addr; + P.p_offset = First->Offset; + P.p_vaddr = First->Addr; + if (!P.HasLMA) + P.p_paddr = First->getLMA(); + } + if (P.p_type == PT_LOAD) + P.p_align = Config->MaxPageSize; + else if (P.p_type == PT_GNU_RELRO) { + P.p_align = 1; + // The glibc dynamic loader rounds the size down, so we need to round up + // to protect the last page. This is a no-op on FreeBSD which always + // rounds up. + P.p_memsz = alignTo(P.p_memsz, Config->MaxPageSize); + } + + // The TLS pointer goes after PT_TLS. At least glibc will align it, + // so round up the size to make sure the offsets are correct. + if (P.p_type == PT_TLS) { + Out<ELFT>::TlsPhdr = &P; + if (P.p_memsz) + P.p_memsz = alignTo(P.p_memsz, P.p_align); + } + } +} + +// The entry point address is chosen in the following ways. +// +// 1. the '-e' entry command-line option; +// 2. the ENTRY(symbol) command in a linker control script; +// 3. the value of the symbol start, if present; +// 4. the address of the first byte of the .text section, if present; +// 5. the address 0. +template <class ELFT> typename ELFT::uint Writer<ELFT>::getEntryAddr() { + // Case 1, 2 or 3. As a special case, if the symbol is actually + // a number, we'll use that number as an address. + if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Entry)) + return B->getVA<ELFT>(); + uint64_t Addr; + if (!Config->Entry.getAsInteger(0, Addr)) + return Addr; + + // Case 4 + if (OutputSectionBase *Sec = findSection(".text")) { + if (Config->WarnMissingEntry) + warn("cannot find entry symbol " + Config->Entry + "; defaulting to 0x" + + utohexstr(Sec->Addr)); + return Sec->Addr; + } + + // Case 5 + if (Config->WarnMissingEntry) + warn("cannot find entry symbol " + Config->Entry + + "; not setting start address"); + return 0; +} + +template <class ELFT> static uint8_t getELFEncoding() { + if (ELFT::TargetEndianness == llvm::support::little) + return ELFDATA2LSB; + return ELFDATA2MSB; +} + +static uint16_t getELFType() { + if (Config->Pic) + return ET_DYN; + if (Config->Relocatable) + return ET_REL; + return ET_EXEC; +} + +// This function is called after we have assigned address and size +// to each section. This function fixes some predefined absolute +// symbol values that depend on section address and size. +template <class ELFT> void Writer<ELFT>::fixAbsoluteSymbols() { + // __ehdr_start is the location of program headers. + if (ElfSym<ELFT>::EhdrStart) + ElfSym<ELFT>::EhdrStart->Value = Out<ELFT>::ProgramHeaders->Addr; + + auto Set = [](DefinedRegular<ELFT> *S1, DefinedRegular<ELFT> *S2, uintX_t V) { + if (S1) + S1->Value = V; + if (S2) + S2->Value = V; + }; + + // _etext is the first location after the last read-only loadable segment. + // _edata is the first location after the last read-write loadable segment. + // _end is the first location after the uninitialized data region. + for (PhdrEntry &P : Phdrs) { + if (P.p_type != PT_LOAD) + continue; + Set(ElfSym<ELFT>::End, ElfSym<ELFT>::End2, P.p_vaddr + P.p_memsz); + + uintX_t Val = P.p_vaddr + P.p_filesz; + if (P.p_flags & PF_W) + Set(ElfSym<ELFT>::Edata, ElfSym<ELFT>::Edata2, Val); + else + Set(ElfSym<ELFT>::Etext, ElfSym<ELFT>::Etext2, Val); + } + + // Setup MIPS _gp_disp/__gnu_local_gp symbols which should + // be equal to the _gp symbol's value. + if (Config->EMachine == EM_MIPS) { + if (!ElfSym<ELFT>::MipsGp->Value) { + // Find GP-relative section with the lowest address + // and use this address to calculate default _gp value. + uintX_t Gp = -1; + for (const OutputSectionBase * OS : OutputSections) + if ((OS->Flags & SHF_MIPS_GPREL) && OS->Addr < Gp) + Gp = OS->Addr; + if (Gp != (uintX_t)-1) + ElfSym<ELFT>::MipsGp->Value = Gp + 0x7ff0; + } + if (ElfSym<ELFT>::MipsGpDisp) + ElfSym<ELFT>::MipsGpDisp->Value = ElfSym<ELFT>::MipsGp->Value; + if (ElfSym<ELFT>::MipsLocalGp) + ElfSym<ELFT>::MipsLocalGp->Value = ElfSym<ELFT>::MipsGp->Value; + } +} + +template <class ELFT> void Writer<ELFT>::writeHeader() { + uint8_t *Buf = Buffer->getBufferStart(); + memcpy(Buf, "\177ELF", 4); + + // Write the ELF header. + auto *EHdr = reinterpret_cast<Elf_Ehdr *>(Buf); + EHdr->e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; + EHdr->e_ident[EI_DATA] = getELFEncoding<ELFT>(); + EHdr->e_ident[EI_VERSION] = EV_CURRENT; + EHdr->e_ident[EI_OSABI] = Config->OSABI; + EHdr->e_type = getELFType(); + EHdr->e_machine = Config->EMachine; + EHdr->e_version = EV_CURRENT; + EHdr->e_entry = getEntryAddr(); + EHdr->e_shoff = SectionHeaderOff; + EHdr->e_ehsize = sizeof(Elf_Ehdr); + EHdr->e_phnum = Phdrs.size(); + EHdr->e_shentsize = sizeof(Elf_Shdr); + EHdr->e_shnum = OutputSections.size() + 1; + EHdr->e_shstrndx = In<ELFT>::ShStrTab->OutSec->SectionIndex; + + if (Config->EMachine == EM_ARM) + // We don't currently use any features incompatible with EF_ARM_EABI_VER5, + // but we don't have any firm guarantees of conformance. Linux AArch64 + // kernels (as of 2016) require an EABI version to be set. + EHdr->e_flags = EF_ARM_EABI_VER5; + else if (Config->EMachine == EM_MIPS) + EHdr->e_flags = getMipsEFlags<ELFT>(); + + if (!Config->Relocatable) { + EHdr->e_phoff = sizeof(Elf_Ehdr); + EHdr->e_phentsize = sizeof(Elf_Phdr); + } + + // Write the program header table. + auto *HBuf = reinterpret_cast<Elf_Phdr *>(Buf + EHdr->e_phoff); + for (PhdrEntry &P : Phdrs) { + HBuf->p_type = P.p_type; + HBuf->p_flags = P.p_flags; + HBuf->p_offset = P.p_offset; + HBuf->p_vaddr = P.p_vaddr; + HBuf->p_paddr = P.p_paddr; + HBuf->p_filesz = P.p_filesz; + HBuf->p_memsz = P.p_memsz; + HBuf->p_align = P.p_align; + ++HBuf; + } + + // Write the section header table. Note that the first table entry is null. + auto *SHdrs = reinterpret_cast<Elf_Shdr *>(Buf + EHdr->e_shoff); + for (OutputSectionBase *Sec : OutputSections) + Sec->writeHeaderTo<ELFT>(++SHdrs); +} + +// Removes a given file asynchronously. This is a performance hack, +// so remove this when operating systems are improved. +// +// On Linux (and probably on other Unix-like systems), unlink(2) is a +// noticeably slow system call. As of 2016, unlink takes 250 +// milliseconds to remove a 1 GB file on ext4 filesystem on my machine. +// +// To create a new result file, we first remove existing file. So, if +// you repeatedly link a 1 GB program in a regular compile-link-debug +// cycle, every cycle wastes 250 milliseconds only to remove a file. +// Since LLD can link a 1 GB binary in about 5 seconds, that waste +// actually counts. +// +// This function spawns a background thread to call unlink. +// The calling thread returns almost immediately. +static void unlinkAsync(StringRef Path) { + if (!Config->Threads || !sys::fs::exists(Config->OutputFile)) + return; + + // First, rename Path to avoid race condition. We cannot remove + // Path from a different thread because we are now going to create + // Path as a new file. If we do that in a different thread, the new + // thread can remove the new file. + SmallString<128> TempPath; + if (auto EC = sys::fs::createUniqueFile(Path + "tmp%%%%%%%%", TempPath)) + fatal(EC, "createUniqueFile failed"); + if (auto EC = sys::fs::rename(Path, TempPath)) + fatal(EC, "rename failed"); + + // Remove TempPath in background. + std::thread([=] { ::remove(TempPath.str().str().c_str()); }).detach(); +} + +// Open a result file. +template <class ELFT> void Writer<ELFT>::openFile() { + unlinkAsync(Config->OutputFile); + ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Config->OutputFile, FileSize, + FileOutputBuffer::F_executable); + + if (auto EC = BufferOrErr.getError()) + error(EC, "failed to open " + Config->OutputFile); + else + Buffer = std::move(*BufferOrErr); +} + +template <class ELFT> void Writer<ELFT>::writeSectionsBinary() { + uint8_t *Buf = Buffer->getBufferStart(); + for (OutputSectionBase *Sec : OutputSections) + if (Sec->Flags & SHF_ALLOC) + Sec->writeTo(Buf + Sec->Offset); +} + +// Write section contents to a mmap'ed file. +template <class ELFT> void Writer<ELFT>::writeSections() { + uint8_t *Buf = Buffer->getBufferStart(); + + // PPC64 needs to process relocations in the .opd section + // before processing relocations in code-containing sections. + Out<ELFT>::Opd = findSection(".opd"); + if (Out<ELFT>::Opd) { + Out<ELFT>::OpdBuf = Buf + Out<ELFT>::Opd->Offset; + Out<ELFT>::Opd->writeTo(Buf + Out<ELFT>::Opd->Offset); + } + + OutputSectionBase *EhFrameHdr = + In<ELFT>::EhFrameHdr ? In<ELFT>::EhFrameHdr->OutSec : nullptr; + for (OutputSectionBase *Sec : OutputSections) + if (Sec != Out<ELFT>::Opd && Sec != EhFrameHdr) + Sec->writeTo(Buf + Sec->Offset); + + // The .eh_frame_hdr depends on .eh_frame section contents, therefore + // it should be written after .eh_frame is written. + if (!Out<ELFT>::EhFrame->empty() && EhFrameHdr) + EhFrameHdr->writeTo(Buf + EhFrameHdr->Offset); +} + +template <class ELFT> void Writer<ELFT>::writeBuildId() { + if (!In<ELFT>::BuildId || !In<ELFT>::BuildId->OutSec) + return; + + // Compute a hash of all sections of the output file. + uint8_t *Start = Buffer->getBufferStart(); + uint8_t *End = Start + FileSize; + In<ELFT>::BuildId->writeBuildId({Start, End}); +} + +template void elf::writeResult<ELF32LE>(); +template void elf::writeResult<ELF32BE>(); +template void elf::writeResult<ELF64LE>(); +template void elf::writeResult<ELF64BE>(); + +template void elf::allocateHeaders<ELF32LE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); +template void elf::allocateHeaders<ELF32BE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); +template void elf::allocateHeaders<ELF64LE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); +template void elf::allocateHeaders<ELF64BE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); + +template bool elf::isRelroSection<ELF32LE>(const OutputSectionBase *); +template bool elf::isRelroSection<ELF32BE>(const OutputSectionBase *); +template bool elf::isRelroSection<ELF64LE>(const OutputSectionBase *); +template bool elf::isRelroSection<ELF64BE>(const OutputSectionBase *); + +template void elf::reportDiscarded<ELF32LE>(InputSectionBase<ELF32LE> *); +template void elf::reportDiscarded<ELF32BE>(InputSectionBase<ELF32BE> *); +template void elf::reportDiscarded<ELF64LE>(InputSectionBase<ELF64LE> *); +template void elf::reportDiscarded<ELF64BE>(InputSectionBase<ELF64BE> *); diff --git a/contrib/llvm/tools/lld/ELF/Writer.h b/contrib/llvm/tools/lld/ELF/Writer.h new file mode 100644 index 000000000000..718e3139a809 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Writer.h @@ -0,0 +1,66 @@ +//===- Writer.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_WRITER_H +#define LLD_ELF_WRITER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include <cstdint> +#include <memory> + +namespace lld { +namespace elf { +class InputFile; +class OutputSectionBase; +template <class ELFT> class InputSectionBase; +template <class ELFT> class ObjectFile; +template <class ELFT> class SymbolTable; +template <class ELFT> void writeResult(); +template <class ELFT> void markLive(); +template <class ELFT> bool isRelroSection(const OutputSectionBase *Sec); + +// This describes a program header entry. +// Each contains type, access flags and range of output sections that will be +// placed in it. +struct PhdrEntry { + PhdrEntry(unsigned Type, unsigned Flags); + void add(OutputSectionBase *Sec); + + uint64_t p_paddr = 0; + uint64_t p_vaddr = 0; + uint64_t p_align = 0; + uint64_t p_memsz = 0; + uint64_t p_filesz = 0; + uint64_t p_offset = 0; + uint32_t p_type = 0; + uint32_t p_flags = 0; + + OutputSectionBase *First = nullptr; + OutputSectionBase *Last = nullptr; + bool HasLMA = false; +}; + +llvm::StringRef getOutputSectionName(llvm::StringRef Name); + +template <class ELFT> +void allocateHeaders(llvm::MutableArrayRef<PhdrEntry>, + llvm::ArrayRef<OutputSectionBase *>); +template <class ELFT> void reportDiscarded(InputSectionBase<ELFT> *IS); + +template <class ELFT> uint32_t getMipsEFlags(); + +uint8_t getMipsFpAbiFlag(uint8_t OldFlag, uint8_t NewFlag, + llvm::StringRef FileName); + +bool isMipsN32Abi(const InputFile *F); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/FREEBSD-Xlist b/contrib/llvm/tools/lld/FREEBSD-Xlist new file mode 100644 index 000000000000..a56ac6bc2505 --- /dev/null +++ b/contrib/llvm/tools/lld/FREEBSD-Xlist @@ -0,0 +1,5 @@ +# $FreeBSD$ +cmake/ +docs/ +test/ +unittests/ diff --git a/contrib/llvm/tools/lld/LICENSE.TXT b/contrib/llvm/tools/lld/LICENSE.TXT new file mode 100644 index 000000000000..ec97986c86ba --- /dev/null +++ b/contrib/llvm/tools/lld/LICENSE.TXT @@ -0,0 +1,62 @@ +============================================================================== +lld License +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2011-2016 by the contributors listed in CREDITS.TXT +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== +The lld software contains code written by third parties. Such software will +have its own individual LICENSE.TXT file in the directory in which it appears. +This file will describe the copyrights, license, and restrictions which apply +to that code. + +The disclaimer of warranty in the University of Illinois Open Source License +applies to all code in the lld Distribution, and nothing in any of the +other licenses gives permission to use the names of the LLVM Team or the +University of Illinois to endorse or promote products derived from this +Software. + +The following pieces of software have additional or alternate copyrights, +licenses, and/or restrictions: + +Program Directory +------- --------- +<none yet> diff --git a/contrib/llvm/tools/lld/README.md b/contrib/llvm/tools/lld/README.md new file mode 100644 index 000000000000..dc05cdea0a12 --- /dev/null +++ b/contrib/llvm/tools/lld/README.md @@ -0,0 +1,10 @@ + +LLVM Linker (lld) +============================== + +This directory and its subdirectories contain source code for the LLVM Linker, a +modular cross platform linker which is built as part of the LLVM compiler +infrastructure project. + +lld is open source software. You may freely distribute it under the terms of +the license agreement found in LICENSE.txt. diff --git a/contrib/llvm/tools/lld/include/lld/Config/Version.h b/contrib/llvm/tools/lld/include/lld/Config/Version.h new file mode 100644 index 000000000000..1cec3cc7678c --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Config/Version.h @@ -0,0 +1,25 @@ +//===- lld/Config/Version.h - LLD Version Number ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines a version-related utility function. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_VERSION_H +#define LLD_VERSION_H + +#include "lld/Config/Version.inc" +#include "llvm/ADT/StringRef.h" + +namespace lld { +/// \brief Retrieves a string representing the complete lld version. +std::string getLLDVersion(); +} + +#endif // LLD_VERSION_H diff --git a/contrib/llvm/tools/lld/include/lld/Config/Version.inc.in b/contrib/llvm/tools/lld/include/lld/Config/Version.inc.in new file mode 100644 index 000000000000..2789a5c46089 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Config/Version.inc.in @@ -0,0 +1,6 @@ +#define LLD_VERSION @LLD_VERSION@ +#define LLD_VERSION_STRING "@LLD_VERSION@" +#define LLD_VERSION_MAJOR @LLD_VERSION_MAJOR@ +#define LLD_VERSION_MINOR @LLD_VERSION_MINOR@ +#define LLD_REVISION_STRING "@LLD_REVISION@" +#define LLD_REPOSITORY_STRING "@LLD_REPOSITORY@" diff --git a/contrib/llvm/tools/lld/include/lld/Core/AbsoluteAtom.h b/contrib/llvm/tools/lld/include/lld/Core/AbsoluteAtom.h new file mode 100644 index 000000000000..ed25297cea81 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/AbsoluteAtom.h @@ -0,0 +1,43 @@ +//===- Core/AbsoluteAtom.h - An absolute Atom -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ABSOLUTE_ATOM_H +#define LLD_CORE_ABSOLUTE_ATOM_H + +#include "lld/Core/Atom.h" + +namespace lld { + +/// An AbsoluteAtom has no content. +/// It exists to represent content at fixed addresses in memory. +class AbsoluteAtom : public Atom { +public: + + virtual uint64_t value() const = 0; + + /// scope - The visibility of this atom to other atoms. C static functions + /// have scope scopeTranslationUnit. Regular C functions have scope + /// scopeGlobal. Functions compiled with visibility=hidden have scope + /// scopeLinkageUnit so they can be see by other atoms being linked but not + /// by the OS loader. + virtual Scope scope() const = 0; + + static bool classof(const Atom *a) { + return a->definition() == definitionAbsolute; + } + + static bool classof(const AbsoluteAtom *) { return true; } + +protected: + AbsoluteAtom() : Atom(definitionAbsolute) {} +}; + +} // namespace lld + +#endif // LLD_CORE_ABSOLUTE_ATOM_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/ArchiveLibraryFile.h b/contrib/llvm/tools/lld/include/lld/Core/ArchiveLibraryFile.h new file mode 100644 index 000000000000..2c736e7d6c61 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/ArchiveLibraryFile.h @@ -0,0 +1,47 @@ +//===- Core/ArchiveLibraryFile.h - Models static library ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ARCHIVE_LIBRARY_FILE_H +#define LLD_CORE_ARCHIVE_LIBRARY_FILE_H + +#include "lld/Core/File.h" +#include <set> + +namespace lld { + +/// +/// The ArchiveLibraryFile subclass of File is used to represent unix +/// static library archives. These libraries provide no atoms to the +/// initial set of atoms linked. Instead, when the Resolver will query +/// ArchiveLibraryFile instances for specific symbols names using the +/// find() method. If the archive contains an object file which has a +/// DefinedAtom whose scope is not translationUnit, then that entire +/// object file File is returned. +/// +class ArchiveLibraryFile : public File { +public: + static bool classof(const File *f) { + return f->kind() == kindArchiveLibrary; + } + + /// Check if any member of the archive contains an Atom with the + /// specified name and return the File object for that member, or nullptr. + virtual File *find(StringRef name) = 0; + + virtual std::error_code + parseAllMembers(std::vector<std::unique_ptr<File>> &result) = 0; + +protected: + /// only subclasses of ArchiveLibraryFile can be instantiated + ArchiveLibraryFile(StringRef path) : File(path, kindArchiveLibrary) {} +}; + +} // namespace lld + +#endif // LLD_CORE_ARCHIVE_LIBRARY_FILE_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Atom.h b/contrib/llvm/tools/lld/include/lld/Core/Atom.h new file mode 100644 index 000000000000..156a5d4a736f --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Atom.h @@ -0,0 +1,131 @@ +//===- Core/Atom.h - A node in linking graph --------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ATOM_H +#define LLD_CORE_ATOM_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" + +namespace lld { + +class File; + +template<typename T> +class OwningAtomPtr; + +/// +/// The linker has a Graph Theory model of linking. An object file is seen +/// as a set of Atoms with References to other Atoms. Each Atom is a node +/// and each Reference is an edge. An Atom can be a DefinedAtom which has +/// content or a UndefinedAtom which is a placeholder and represents an +/// undefined symbol (extern declaration). +/// +class Atom { + template<typename T> friend class OwningAtomPtr; + +public: + /// Whether this atom is defined or a proxy for an undefined symbol + enum Definition { + definitionRegular, ///< Normal C/C++ function or global variable. + definitionAbsolute, ///< Asm-only (foo = 10). Not tied to any content. + definitionUndefined, ///< Only in .o files to model reference to undef. + definitionSharedLibrary ///< Only in shared libraries to model export. + }; + + /// The scope in which this atom is acessible to other atoms. + enum Scope { + scopeTranslationUnit, ///< Accessible only to atoms in the same translation + /// unit (e.g. a C static). + scopeLinkageUnit, ///< Accessible to atoms being linked but not visible + /// to runtime loader (e.g. visibility=hidden). + scopeGlobal ///< Accessible to all atoms and visible to runtime + /// loader (e.g. visibility=default). + }; + + /// file - returns the File that produced/owns this Atom + virtual const File& file() const = 0; + + /// name - The name of the atom. For a function atom, it is the (mangled) + /// name of the function. + virtual StringRef name() const = 0; + + /// definition - Whether this atom is a definition or represents an undefined + /// symbol. + Definition definition() const { return _definition; } + + static bool classof(const Atom *a) { return true; } + +protected: + /// Atom is an abstract base class. Only subclasses can access constructor. + explicit Atom(Definition def) : _definition(def) {} + + /// The memory for Atom objects is always managed by the owning File + /// object. Therefore, no one but the owning File object should call + /// delete on an Atom. In fact, some File objects may bulk allocate + /// an array of Atoms, so they cannot be individually deleted by anyone. + virtual ~Atom() = default; + +private: + Definition _definition; +}; + +/// Class which owns an atom pointer and runs the atom destructor when the +/// owning pointer goes out of scope. +template<typename T> +class OwningAtomPtr { +private: + OwningAtomPtr(const OwningAtomPtr &) = delete; + void operator=(const OwningAtomPtr &) = delete; + +public: + OwningAtomPtr() = default; + OwningAtomPtr(T *atom) : atom(atom) { } + + ~OwningAtomPtr() { + if (atom) + runDestructor(atom); + } + + void runDestructor(Atom *atom) { + atom->~Atom(); + } + + OwningAtomPtr(OwningAtomPtr &&ptr) : atom(ptr.atom) { + ptr.atom = nullptr; + } + + void operator=(OwningAtomPtr&& ptr) { + if (atom) + runDestructor(atom); + atom = ptr.atom; + ptr.atom = nullptr; + } + + T *const &get() const { + return atom; + } + + T *&get() { + return atom; + } + + T *release() { + auto *v = atom; + atom = nullptr; + return v; + } + +private: + T *atom = nullptr; +}; + +} // end namespace lld + +#endif // LLD_CORE_ATOM_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/DefinedAtom.h b/contrib/llvm/tools/lld/include/lld/Core/DefinedAtom.h new file mode 100644 index 000000000000..7f623d2ea5e6 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/DefinedAtom.h @@ -0,0 +1,374 @@ +//===- Core/DefinedAtom.h - An Atom with content --------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_DEFINED_ATOM_H +#define LLD_CORE_DEFINED_ATOM_H + +#include "lld/Core/Atom.h" +#include "lld/Core/Reference.h" +#include "lld/Core/LLVM.h" +#include "llvm/Support/ErrorHandling.h" + +namespace lld { +class File; + +/// \brief The fundamental unit of linking. +/// +/// A C function or global variable is an atom. An atom has content and +/// attributes. The content of a function atom is the instructions that +/// implement the function. The content of a global variable atom is its +/// initial bytes. +/// +/// Here are some example attribute sets for common atoms. If a particular +/// attribute is not listed, the default values are: definition=regular, +/// sectionChoice=basedOnContent, scope=translationUnit, merge=no, +/// deadStrip=normal, interposable=no +/// +/// C function: void foo() {} <br> +/// name=foo, type=code, perm=r_x, scope=global +/// +/// C static function: staic void func() {} <br> +/// name=func, type=code, perm=r_x +/// +/// C global variable: int count = 1; <br> +/// name=count, type=data, perm=rw_, scope=global +/// +/// C tentative definition: int bar; <br> +/// name=bar, type=zerofill, perm=rw_, scope=global, +/// merge=asTentative, interposable=yesAndRuntimeWeak +/// +/// Uninitialized C static variable: static int stuff; <br> +/// name=stuff, type=zerofill, perm=rw_ +/// +/// Weak C function: __attribute__((weak)) void foo() {} <br> +/// name=foo, type=code, perm=r_x, scope=global, merge=asWeak +/// +/// Hidden C function: __attribute__((visibility("hidden"))) void foo() {}<br> +/// name=foo, type=code, perm=r_x, scope=linkageUnit +/// +/// No-dead-strip function: __attribute__((used)) void foo() {} <br> +/// name=foo, type=code, perm=r_x, scope=global, deadStrip=never +/// +/// Non-inlined C++ inline method: inline void Foo::doit() {} <br> +/// name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global, +/// mergeDupes=asWeak +/// +/// Non-inlined C++ inline method whose address is taken: +/// inline void Foo::doit() {} <br> +/// name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global, +/// mergeDupes=asAddressedWeak +/// +/// literal c-string: "hello" <br> +/// name="" type=cstring, perm=r__, scope=linkageUnit +/// +/// literal double: 1.234 <br> +/// name="" type=literal8, perm=r__, scope=linkageUnit +/// +/// constant: { 1,2,3 } <br> +/// name="" type=constant, perm=r__, scope=linkageUnit +/// +/// Pointer to initializer function: <br> +/// name="" type=initializer, perm=rw_l, +/// sectionChoice=customRequired +/// +/// C function place in custom section: __attribute__((section("__foo"))) +/// void foo() {} <br> +/// name=foo, type=code, perm=r_x, scope=global, +/// sectionChoice=customRequired, customSectionName=__foo +/// +class DefinedAtom : public Atom { +public: + enum Interposable { + interposeNo, // linker can directly bind uses of this atom + interposeYes, // linker must indirect (through GOT) uses + interposeYesAndRuntimeWeak // must indirect and mark symbol weak in final + // linked image + }; + + enum Merge { + mergeNo, // Another atom with same name is error + mergeAsTentative, // Is ANSI C tentative definition, can be coalesced + mergeAsWeak, // Is C++ inline definition that was not inlined, + // but address was not taken, so atom can be hidden + // by linker + mergeAsWeakAndAddressUsed, // Is C++ definition inline definition whose + // address was taken. + mergeSameNameAndSize, // Another atom with different size is error + mergeByLargestSection, // Choose an atom whose section is the largest. + mergeByContent, // Merge with other constants with same content. + }; + + enum ContentType { + typeUnknown, // for use with definitionUndefined + typeMachHeader, // atom representing mach_header [Darwin] + typeCode, // executable code + typeResolver, // function which returns address of target + typeBranchIsland, // linker created for large binaries + typeBranchShim, // linker created to switch thumb mode + typeStub, // linker created for calling external function + typeStubHelper, // linker created for initial stub binding + typeConstant, // a read-only constant + typeCString, // a zero terminated UTF8 C string + typeUTF16String, // a zero terminated UTF16 string + typeCFI, // a FDE or CIE from dwarf unwind info + typeLSDA, // extra unwinding info + typeLiteral4, // a four-btye read-only constant + typeLiteral8, // an eight-btye read-only constant + typeLiteral16, // a sixteen-btye read-only constant + typeData, // read-write data + typeDataFast, // allow data to be quickly accessed + typeZeroFill, // zero-fill data + typeZeroFillFast, // allow zero-fill data to be quicky accessed + typeConstData, // read-only data after dynamic linker is done + typeObjC1Class, // ObjC1 class [Darwin] + typeLazyPointer, // pointer through which a stub jumps + typeLazyDylibPointer, // pointer through which a stub jumps [Darwin] + typeNonLazyPointer, // pointer to external symbol + typeCFString, // NS/CFString object [Darwin] + typeGOT, // pointer to external symbol + typeInitializerPtr, // pointer to initializer function + typeTerminatorPtr, // pointer to terminator function + typeCStringPtr, // pointer to UTF8 C string [Darwin] + typeObjCClassPtr, // pointer to ObjC class [Darwin] + typeObjC2CategoryList, // pointers to ObjC category [Darwin] + typeObjCImageInfo, // pointer to ObjC class [Darwin] + typeObjCMethodList, // pointer to ObjC method list [Darwin] + typeDTraceDOF, // runtime data for Dtrace [Darwin] + typeInterposingTuples, // tuples of interposing info for dyld [Darwin] + typeTempLTO, // temporary atom for bitcode reader + typeCompactUnwindInfo, // runtime data for unwinder [Darwin] + typeProcessedUnwindInfo,// compressed compact unwind info [Darwin] + typeThunkTLV, // thunk used to access a TLV [Darwin] + typeTLVInitialData, // initial data for a TLV [Darwin] + typeTLVInitialZeroFill, // TLV initial zero fill data [Darwin] + typeTLVInitializerPtr, // pointer to thread local initializer [Darwin] + typeDSOHandle, // atom representing DSO handle [Darwin] + typeSectCreate, // Created via the -sectcreate option [Darwin] + }; + + // Permission bits for atoms and segments. The order of these values are + // important, because the layout pass may sort atoms by permission if other + // attributes are the same. + enum ContentPermissions { + perm___ = 0, // mapped as unaccessible + permR__ = 8, // mapped read-only + permRW_ = 8 + 2, // mapped readable and writable + permRW_L = 8 + 2 + 1, // initially mapped r/w, then made read-only + // loader writable + permR_X = 8 + 4, // mapped readable and executable + permRWX = 8 + 2 + 4, // mapped readable and writable and executable + permUnknown = 16 // unknown or invalid permissions + }; + + enum SectionChoice { + sectionBasedOnContent, // linker infers final section based on content + sectionCustomPreferred, // linker may place in specific section + sectionCustomRequired // linker must place in specific section + }; + + enum DeadStripKind { + deadStripNormal, // linker may dead strip this atom + deadStripNever, // linker must never dead strip this atom + deadStripAlways // linker must remove this atom if unused + }; + + enum DynamicExport { + /// \brief The linker may or may not export this atom dynamically depending + /// on the output type and other context of the link. + dynamicExportNormal, + /// \brief The linker will always export this atom dynamically. + dynamicExportAlways, + }; + + // Attributes describe a code model used by the atom. + enum CodeModel { + codeNA, // no specific code model + // MIPS code models + codeMipsPIC, // PIC function in a PIC / non-PIC mixed file + codeMipsMicro, // microMIPS instruction encoding + codeMipsMicroPIC, // microMIPS instruction encoding + PIC + codeMips16, // MIPS-16 instruction encoding + // ARM code models + codeARMThumb, // ARM Thumb instruction set + codeARM_a, // $a-like mapping symbol (for ARM code) + codeARM_d, // $d-like mapping symbol (for data) + codeARM_t, // $t-like mapping symbol (for Thumb code) + }; + + struct Alignment { + Alignment(int v, int m = 0) : value(v), modulus(m) {} + + uint16_t value; + uint16_t modulus; + + bool operator==(const Alignment &rhs) const { + return (value == rhs.value) && (modulus == rhs.modulus); + } + }; + + /// \brief returns a value for the order of this Atom within its file. + /// + /// This is used by the linker to order the layout of Atoms so that the + /// resulting image is stable and reproducible. + virtual uint64_t ordinal() const = 0; + + /// \brief the number of bytes of space this atom's content will occupy in the + /// final linked image. + /// + /// For a function atom, it is the number of bytes of code in the function. + virtual uint64_t size() const = 0; + + /// \brief The size of the section from which the atom is instantiated. + /// + /// Merge::mergeByLargestSection is defined in terms of section size + /// and not in terms of atom size, so we need this function separate + /// from size(). + virtual uint64_t sectionSize() const { return 0; } + + /// \brief The visibility of this atom to other atoms. + /// + /// C static functions have scope scopeTranslationUnit. Regular C functions + /// have scope scopeGlobal. Functions compiled with visibility=hidden have + /// scope scopeLinkageUnit so they can be see by other atoms being linked but + /// not by the OS loader. + virtual Scope scope() const = 0; + + /// \brief Whether the linker should use direct or indirect access to this + /// atom. + virtual Interposable interposable() const = 0; + + /// \brief how the linker should handle if multiple atoms have the same name. + virtual Merge merge() const = 0; + + /// \brief The type of this atom, such as code or data. + virtual ContentType contentType() const = 0; + + /// \brief The alignment constraints on how this atom must be laid out in the + /// final linked image (e.g. 16-byte aligned). + virtual Alignment alignment() const = 0; + + /// \brief Whether this atom must be in a specially named section in the final + /// linked image, or if the linker can infer the section based on the + /// contentType(). + virtual SectionChoice sectionChoice() const = 0; + + /// \brief If sectionChoice() != sectionBasedOnContent, then this return the + /// name of the section the atom should be placed into. + virtual StringRef customSectionName() const = 0; + + /// \brief constraints on whether the linker may dead strip away this atom. + virtual DeadStripKind deadStrip() const = 0; + + /// \brief Under which conditions should this atom be dynamically exported. + virtual DynamicExport dynamicExport() const { + return dynamicExportNormal; + } + + /// \brief Code model used by the atom. + virtual CodeModel codeModel() const { return codeNA; } + + /// \brief Returns the OS memory protections required for this atom's content + /// at runtime. + /// + /// A function atom is R_X, a global variable is RW_, and a read-only constant + /// is R__. + virtual ContentPermissions permissions() const; + + /// \brief returns a reference to the raw (unrelocated) bytes of this Atom's + /// content. + virtual ArrayRef<uint8_t> rawContent() const = 0; + + /// This class abstracts iterating over the sequence of References + /// in an Atom. Concrete instances of DefinedAtom must implement + /// the derefIterator() and incrementIterator() methods. + class reference_iterator { + public: + reference_iterator(const DefinedAtom &a, const void *it) + : _atom(a), _it(it) { } + + const Reference *operator*() const { + return _atom.derefIterator(_it); + } + + const Reference *operator->() const { + return _atom.derefIterator(_it); + } + + bool operator==(const reference_iterator &other) const { + return _it == other._it; + } + + bool operator!=(const reference_iterator &other) const { + return !(*this == other); + } + + reference_iterator &operator++() { + _atom.incrementIterator(_it); + return *this; + } + private: + const DefinedAtom &_atom; + const void *_it; + }; + + /// \brief Returns an iterator to the beginning of this Atom's References. + virtual reference_iterator begin() const = 0; + + /// \brief Returns an iterator to the end of this Atom's References. + virtual reference_iterator end() const = 0; + + /// Adds a reference to this atom. + virtual void addReference(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue kindValue, uint64_t off, + const Atom *target, Reference::Addend a) { + llvm_unreachable("Subclass does not permit adding references"); + } + + static bool classof(const Atom *a) { + return a->definition() == definitionRegular; + } + + /// Utility for deriving permissions from content type + static ContentPermissions permissions(ContentType type); + + /// Utility function to check if the atom occupies file space + bool occupiesDiskSpace() const { + ContentType atomContentType = contentType(); + return !(atomContentType == DefinedAtom::typeZeroFill || + atomContentType == DefinedAtom::typeZeroFillFast || + atomContentType == DefinedAtom::typeTLVInitialZeroFill); + } + + /// Utility function to check if relocations in this atom to other defined + /// atoms can be implicitly generated, and so we don't need to explicitly + /// emit those relocations. + bool relocsToDefinedCanBeImplicit() const { + ContentType atomContentType = contentType(); + return atomContentType == typeCFI; + } + +protected: + // DefinedAtom is an abstract base class. Only subclasses can access + // constructor. + DefinedAtom() : Atom(definitionRegular) { } + + ~DefinedAtom() override = default; + + /// \brief Returns a pointer to the Reference object that the abstract + /// iterator "points" to. + virtual const Reference *derefIterator(const void *iter) const = 0; + + /// \brief Adjusts the abstract iterator to "point" to the next Reference + /// object for this Atom. + virtual void incrementIterator(const void *&iter) const = 0; +}; +} // end namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/include/lld/Core/Error.h b/contrib/llvm/tools/lld/include/lld/Core/Error.h new file mode 100644 index 000000000000..b0bf73b1cb7b --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Error.h @@ -0,0 +1,68 @@ +//===- Error.h - system_error extensions for lld ----------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares a new error_category for the lld library. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ERROR_H +#define LLD_CORE_ERROR_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include <system_error> + +namespace lld { + +const std::error_category &YamlReaderCategory(); + +enum class YamlReaderError { + unknown_keyword, + illegal_value +}; + +inline std::error_code make_error_code(YamlReaderError e) { + return std::error_code(static_cast<int>(e), YamlReaderCategory()); +} + +/// Creates an error_code object that has associated with it an arbitrary +/// error messsage. The value() of the error_code will always be non-zero +/// but its value is meaningless. The messsage() will be (a copy of) the +/// supplied error string. +/// Note: Once ErrorOr<> is updated to work with errors other than error_code, +/// this can be updated to return some other kind of error. +std::error_code make_dynamic_error_code(StringRef msg); + +/// Generic error. +/// +/// For errors that don't require their own specific sub-error (most errors) +/// this class can be used to describe the error via a string message. +class GenericError : public llvm::ErrorInfo<GenericError> { +public: + static char ID; + GenericError(Twine Msg); + const std::string &getMessage() const { return Msg; } + void log(llvm::raw_ostream &OS) const override; + + std::error_code convertToErrorCode() const override { + return make_dynamic_error_code(getMessage()); + } + +private: + std::string Msg; +}; + +} // end namespace lld + +namespace std { +template <> struct is_error_code_enum<lld::YamlReaderError> : std::true_type {}; +} + +#endif diff --git a/contrib/llvm/tools/lld/include/lld/Core/File.h b/contrib/llvm/tools/lld/include/lld/Core/File.h new file mode 100644 index 000000000000..20418688dfa0 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/File.h @@ -0,0 +1,278 @@ +//===- Core/File.h - A Container of Atoms ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_FILE_H +#define LLD_CORE_FILE_H + +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include <functional> +#include <memory> +#include <mutex> +#include <vector> + +namespace lld { + +class LinkingContext; + +/// Every Atom is owned by some File. A common scenario is for a single +/// object file (.o) to be parsed by some reader and produce a single +/// File object that represents the content of that object file. +/// +/// To iterate through the Atoms in a File there are four methods that +/// return collections. For instance to iterate through all the DefinedAtoms +/// in a File object use: +/// for (const DefinedAtoms *atom : file->defined()) { +/// } +/// +/// The Atom objects in a File are owned by the File object. The Atom objects +/// are destroyed when the File object is destroyed. +class File { +public: + virtual ~File(); + + /// \brief Kinds of files that are supported. + enum Kind { + kindErrorObject, ///< a error object file (.o) + kindNormalizedObject, ///< a normalized file (.o) + kindMachObject, ///< a MachO object file (.o) + kindCEntryObject, ///< a file for CEntries + kindHeaderObject, ///< a file for file headers + kindEntryObject, ///< a file for the entry + kindUndefinedSymsObject, ///< a file for undefined symbols + kindStubHelperObject, ///< a file for stub helpers + kindResolverMergedObject, ///< the resolver merged file. + kindSectCreateObject, ///< a sect create object file (.o) + kindSharedLibrary, ///< shared library (.so) + kindArchiveLibrary ///< archive (.a) + }; + + /// \brief Returns file kind. Need for dyn_cast<> on File objects. + Kind kind() const { + return _kind; + } + + /// This returns the path to the file which was used to create this object + /// (e.g. "/tmp/foo.o"). If the file is a member of an archive file, the + /// returned string includes the archive file name. + StringRef path() const { + if (_archivePath.empty()) + return _path; + if (_archiveMemberPath.empty()) + _archiveMemberPath = (_archivePath + "(" + _path + ")").str(); + return _archiveMemberPath; + } + + /// Returns the path of the archive file name if this file is instantiated + /// from an archive file. Otherwise returns the empty string. + StringRef archivePath() const { return _archivePath; } + void setArchivePath(StringRef path) { _archivePath = path; } + + /// Returns the path name of this file. It doesn't include archive file name. + StringRef memberPath() const { return _path; } + + /// Returns the command line order of the file. + uint64_t ordinal() const { + assert(_ordinal != UINT64_MAX); + return _ordinal; + } + + /// Returns true/false depending on whether an ordinal has been set. + bool hasOrdinal() const { return (_ordinal != UINT64_MAX); } + + /// Sets the command line order of the file. + void setOrdinal(uint64_t ordinal) const { _ordinal = ordinal; } + + /// Returns the ordinal for the next atom to be defined in this file. + uint64_t getNextAtomOrdinalAndIncrement() const { + return _nextAtomOrdinal++; + } + + /// For allocating any objects owned by this File. + llvm::BumpPtrAllocator &allocator() const { + return _allocator; + } + + /// The type of atom mutable container. + template <typename T> using AtomVector = std::vector<OwningAtomPtr<T>>; + + /// The range type for the atoms. + template <typename T> class AtomRange { + public: + AtomRange(AtomVector<T> &v) : _v(v) {} + AtomRange(const AtomVector<T> &v) : _v(const_cast<AtomVector<T> &>(v)) {} + + typedef std::pointer_to_unary_function<const OwningAtomPtr<T>&, + const T*> ConstDerefFn; + + typedef std::pointer_to_unary_function<OwningAtomPtr<T>&, T*> DerefFn; + + typedef llvm::mapped_iterator<typename AtomVector<T>::const_iterator, + ConstDerefFn> ConstItTy; + typedef llvm::mapped_iterator<typename AtomVector<T>::iterator, + DerefFn> ItTy; + + static const T* DerefConst(const OwningAtomPtr<T> &p) { + return p.get(); + } + + static T* Deref(OwningAtomPtr<T> &p) { + return p.get(); + } + + ConstItTy begin() const { + return ConstItTy(_v.begin(), ConstDerefFn(DerefConst)); + } + ConstItTy end() const { + return ConstItTy(_v.end(), ConstDerefFn(DerefConst)); + } + + ItTy begin() { + return ItTy(_v.begin(), DerefFn(Deref)); + } + ItTy end() { + return ItTy(_v.end(), DerefFn(Deref)); + } + + llvm::iterator_range<typename AtomVector<T>::iterator> owning_ptrs() { + return llvm::make_range(_v.begin(), _v.end()); + } + + llvm::iterator_range<typename AtomVector<T>::iterator> owning_ptrs() const { + return llvm::make_range(_v.begin(), _v.end()); + } + + bool empty() const { + return _v.empty(); + } + + size_t size() const { + return _v.size(); + } + + const OwningAtomPtr<T> &operator[](size_t idx) const { + return _v[idx]; + } + + OwningAtomPtr<T> &operator[](size_t idx) { + return _v[idx]; + } + + private: + AtomVector<T> &_v; + }; + + /// \brief Must be implemented to return the AtomVector object for + /// all DefinedAtoms in this File. + virtual const AtomRange<DefinedAtom> defined() const = 0; + + /// \brief Must be implemented to return the AtomVector object for + /// all UndefinedAtomw in this File. + virtual const AtomRange<UndefinedAtom> undefined() const = 0; + + /// \brief Must be implemented to return the AtomVector object for + /// all SharedLibraryAtoms in this File. + virtual const AtomRange<SharedLibraryAtom> sharedLibrary() const = 0; + + /// \brief Must be implemented to return the AtomVector object for + /// all AbsoluteAtoms in this File. + virtual const AtomRange<AbsoluteAtom> absolute() const = 0; + + /// Drop all of the atoms owned by this file. This will result in all of + /// the atoms running their destructors. + /// This is required because atoms may be allocated on a BumpPtrAllocator + /// of a different file. We need to destruct all atoms before any files. + virtual void clearAtoms() = 0; + + /// \brief If a file is parsed using a different method than doParse(), + /// one must use this method to set the last error status, so that + /// doParse will not be called twice. Only YAML reader uses this + /// (because YAML reader does not read blobs but structured data). + void setLastError(std::error_code err) { _lastError = err; } + + std::error_code parse(); + + // Usually each file owns a std::unique_ptr<MemoryBuffer>. + // However, there's one special case. If a file is an archive file, + // the archive file and its children all shares the same memory buffer. + // This method is used by the ArchiveFile to give its children + // co-ownership of the buffer. + void setSharedMemoryBuffer(std::shared_ptr<MemoryBuffer> mb) { + _sharedMemoryBuffer = mb; + } + +protected: + /// \brief only subclasses of File can be instantiated + File(StringRef p, Kind kind) + : _path(p), _kind(kind), _ordinal(UINT64_MAX), + _nextAtomOrdinal(0) {} + + /// \brief Subclasses should override this method to parse the + /// memory buffer passed to this file's constructor. + virtual std::error_code doParse() { return std::error_code(); } + + static AtomVector<DefinedAtom> _noDefinedAtoms; + static AtomVector<UndefinedAtom> _noUndefinedAtoms; + static AtomVector<SharedLibraryAtom> _noSharedLibraryAtoms; + static AtomVector<AbsoluteAtom> _noAbsoluteAtoms; + mutable llvm::BumpPtrAllocator _allocator; + +private: + StringRef _path; + std::string _archivePath; + mutable std::string _archiveMemberPath; + Kind _kind; + mutable uint64_t _ordinal; + mutable uint64_t _nextAtomOrdinal; + std::shared_ptr<MemoryBuffer> _sharedMemoryBuffer; + llvm::Optional<std::error_code> _lastError; + std::mutex _parseMutex; +}; + +/// An ErrorFile represents a file that doesn't exist. +/// If you try to parse a file which doesn't exist, an instance of this +/// class will be returned. That's parse method always returns an error. +/// This is useful to delay erroring on non-existent files, so that we +/// can do unit testing a driver using non-existing file paths. +class ErrorFile : public File { +public: + ErrorFile(StringRef path, std::error_code ec) + : File(path, kindErrorObject), _ec(ec) {} + + std::error_code doParse() override { return _ec; } + + const AtomRange<DefinedAtom> defined() const override { + llvm_unreachable("internal error"); + } + const AtomRange<UndefinedAtom> undefined() const override { + llvm_unreachable("internal error"); + } + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + llvm_unreachable("internal error"); + } + const AtomRange<AbsoluteAtom> absolute() const override { + llvm_unreachable("internal error"); + } + + void clearAtoms() override { + } + +private: + std::error_code _ec; +}; + +} // end namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/include/lld/Core/Instrumentation.h b/contrib/llvm/tools/lld/include/lld/Core/Instrumentation.h new file mode 100644 index 000000000000..162375905e17 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Instrumentation.h @@ -0,0 +1,132 @@ +//===- include/Core/Instrumentation.h - Instrumentation API ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Provide an Instrumentation API that optionally uses VTune interfaces. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_INSTRUMENTATION_H +#define LLD_CORE_INSTRUMENTATION_H + +#include "llvm/Support/Compiler.h" +#include <utility> + +#ifdef LLD_HAS_VTUNE +# include <ittnotify.h> +#endif + +namespace lld { +#ifdef LLD_HAS_VTUNE +/// \brief A unique global scope for instrumentation data. +/// +/// Domains last for the lifetime of the application and cannot be destroyed. +/// Multiple Domains created with the same name represent the same domain. +class Domain { + __itt_domain *_domain; + +public: + explicit Domain(const char *name) : _domain(__itt_domain_createA(name)) {} + + operator __itt_domain *() const { return _domain; } + __itt_domain *operator->() const { return _domain; } +}; + +/// \brief A global reference to a string constant. +/// +/// These are uniqued by the ITT runtime and cannot be deleted. They are not +/// specific to a domain. +/// +/// Prefer reusing a single StringHandle over passing a ntbs when the same +/// string will be used often. +class StringHandle { + __itt_string_handle *_handle; + +public: + StringHandle(const char *name) : _handle(__itt_string_handle_createA(name)) {} + + operator __itt_string_handle *() const { return _handle; } +}; + +/// \brief A task on a single thread. Nests within other tasks. +/// +/// Each thread has its own task stack and tasks nest recursively on that stack. +/// A task cannot transfer threads. +/// +/// SBRM is used to ensure task starts and ends are ballanced. The lifetime of +/// a task is either the lifetime of this object, or until end is called. +class ScopedTask { + __itt_domain *_domain; + + ScopedTask(const ScopedTask &) = delete; + ScopedTask &operator=(const ScopedTask &) = delete; + +public: + /// \brief Create a task in Domain \p d named \p s. + ScopedTask(const Domain &d, const StringHandle &s) : _domain(d) { + __itt_task_begin(d, __itt_null, __itt_null, s); + } + + ScopedTask(ScopedTask &&other) { + *this = std::move(other); + } + + ScopedTask &operator=(ScopedTask &&other) { + _domain = other._domain; + other._domain = nullptr; + return *this; + } + + /// \brief Prematurely end this task. + void end() { + if (_domain) + __itt_task_end(_domain); + _domain = nullptr; + } + + ~ScopedTask() { end(); } +}; + +/// \brief A specific point in time. Allows metadata to be associated. +class Marker { +public: + Marker(const Domain &d, const StringHandle &s) { + __itt_marker(d, __itt_null, s, __itt_scope_global); + } +}; +#else +class Domain { +public: + Domain(const char *name) {} +}; + +class StringHandle { +public: + StringHandle(const char *name) {} +}; + +class ScopedTask { +public: + ScopedTask(const Domain &d, const StringHandle &s) {} + void end() {} +}; + +class Marker { +public: + Marker(const Domain &d, const StringHandle &s) {} +}; +#endif + +inline const Domain &getDefaultDomain() { + static Domain domain("org.llvm.lld"); + return domain; +} +} // end namespace lld. + +#endif diff --git a/contrib/llvm/tools/lld/include/lld/Core/LLVM.h b/contrib/llvm/tools/lld/include/lld/Core/LLVM.h new file mode 100644 index 000000000000..ccf08859f4ae --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/LLVM.h @@ -0,0 +1,83 @@ +//===--- LLVM.h - Import various common LLVM datatypes ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file forward declares and imports various common LLVM datatypes that +// lld wants to use unqualified. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_LLVM_H +#define LLD_CORE_LLVM_H + +// This should be the only #include, force #includes of all the others on +// clients. +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/Casting.h" +#include <utility> + +namespace llvm { + // ADT's. + class Error; + class StringRef; + class Twine; + class MemoryBuffer; + class MemoryBufferRef; + template<typename T> class ArrayRef; + template<unsigned InternalLen> class SmallString; + template<typename T, unsigned N> class SmallVector; + template<typename T> class SmallVectorImpl; + + template<typename T> + struct SaveAndRestore; + + template<typename T> + class ErrorOr; + + template<typename T> + class Expected; + + class raw_ostream; + // TODO: DenseMap, ... +} + +namespace lld { + // Casting operators. + using llvm::isa; + using llvm::cast; + using llvm::dyn_cast; + using llvm::dyn_cast_or_null; + using llvm::cast_or_null; + + // ADT's. + using llvm::Error; + using llvm::StringRef; + using llvm::Twine; + using llvm::MemoryBuffer; + using llvm::MemoryBufferRef; + using llvm::ArrayRef; + using llvm::SmallString; + using llvm::SmallVector; + using llvm::SmallVectorImpl; + using llvm::SaveAndRestore; + using llvm::ErrorOr; + using llvm::Expected; + + using llvm::raw_ostream; +} // end namespace lld. + +namespace std { +template <> struct hash<llvm::StringRef> { +public: + size_t operator()(const llvm::StringRef &s) const { + return llvm::hash_value(s); + } +}; +} + +#endif diff --git a/contrib/llvm/tools/lld/include/lld/Core/LinkingContext.h b/contrib/llvm/tools/lld/include/lld/Core/LinkingContext.h new file mode 100644 index 000000000000..b3a999b00fbd --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/LinkingContext.h @@ -0,0 +1,258 @@ +//===- lld/Core/LinkingContext.h - Linker Target Info Interface -*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_LINKING_CONTEXT_H +#define LLD_CORE_LINKING_CONTEXT_H + +#include "lld/Core/Node.h" +#include "lld/Core/Reader.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> +#include <memory> +#include <string> +#include <vector> + +namespace lld { + +class PassManager; +class File; +class Writer; +class Node; +class SharedLibraryFile; + +/// \brief The LinkingContext class encapsulates "what and how" to link. +/// +/// The base class LinkingContext contains the options needed by core linking. +/// Subclasses of LinkingContext have additional options needed by specific +/// Writers. +class LinkingContext { +public: + virtual ~LinkingContext(); + + /// \name Methods needed by core linking + /// @{ + + /// Name of symbol linker should use as "entry point" to program, + /// usually "main" or "start". + virtual StringRef entrySymbolName() const { return _entrySymbolName; } + + /// Whether core linking should remove Atoms not reachable by following + /// References from the entry point Atom or from all global scope Atoms + /// if globalsAreDeadStripRoots() is true. + bool deadStrip() const { return _deadStrip; } + + /// Only used if deadStrip() returns true. Means all global scope Atoms + /// should be marked live (along with all Atoms they reference). Usually + /// this method returns false for main executables, but true for dynamic + /// shared libraries. + bool globalsAreDeadStripRoots() const { return _globalsAreDeadStripRoots; } + + /// Only used if deadStrip() returns true. This method returns the names + /// of DefinedAtoms that should be marked live (along with all Atoms they + /// reference). Only Atoms with scope scopeLinkageUnit or scopeGlobal can + /// be kept live using this method. + const std::vector<StringRef> &deadStripRoots() const { + return _deadStripRoots; + } + + /// Add the given symbol name to the dead strip root set. Only used if + /// deadStrip() returns true. + void addDeadStripRoot(StringRef symbolName) { + assert(!symbolName.empty() && "Empty symbol cannot be a dead strip root"); + _deadStripRoots.push_back(symbolName); + } + + /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a + /// SharedLibraryAtom for the link to be successful. This method controls + /// whether core linking prints out a list of remaining UndefinedAtoms. + /// + /// \todo This should be a method core linking calls with a list of the + /// UndefinedAtoms so that different drivers can format the error message + /// as needed. + bool printRemainingUndefines() const { return _printRemainingUndefines; } + + /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a + /// SharedLibraryAtom for the link to be successful. This method controls + /// whether core linking considers remaining undefines to be an error. + bool allowRemainingUndefines() const { return _allowRemainingUndefines; } + + /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a + /// SharedLibraryAtom for the link to be successful. This method controls + /// whether core linking considers remaining undefines from the shared library + /// to be an error. + bool allowShlibUndefines() const { return _allowShlibUndefines; } + + /// If true, core linking will write the path to each input file to stdout + /// (i.e. llvm::outs()) as it is used. This is used to implement the -t + /// linker option. + /// + /// \todo This should be a method core linking calls so that drivers can + /// format the line as needed. + bool logInputFiles() const { return _logInputFiles; } + + /// Parts of LLVM use global variables which are bound to command line + /// options (see llvm::cl::Options). This method returns "command line" + /// options which are used to configure LLVM's command line settings. + /// For instance the -debug-only XXX option can be used to dynamically + /// trace different parts of LLVM and lld. + const std::vector<const char *> &llvmOptions() const { return _llvmOptions; } + + /// \name Methods used by Drivers to configure TargetInfo + /// @{ + void setOutputPath(StringRef str) { _outputPath = str; } + + // Set the entry symbol name. You may also need to call addDeadStripRoot() for + // the symbol if your platform supports dead-stripping, so that the symbol + // will not be removed from the output. + void setEntrySymbolName(StringRef name) { + _entrySymbolName = name; + } + + void setDeadStripping(bool enable) { _deadStrip = enable; } + void setGlobalsAreDeadStripRoots(bool v) { _globalsAreDeadStripRoots = v; } + + void setPrintRemainingUndefines(bool print) { + _printRemainingUndefines = print; + } + + void setAllowRemainingUndefines(bool allow) { + _allowRemainingUndefines = allow; + } + + void setAllowShlibUndefines(bool allow) { _allowShlibUndefines = allow; } + void setLogInputFiles(bool log) { _logInputFiles = log; } + + void appendLLVMOption(const char *opt) { _llvmOptions.push_back(opt); } + + std::vector<std::unique_ptr<Node>> &getNodes() { return _nodes; } + const std::vector<std::unique_ptr<Node>> &getNodes() const { return _nodes; } + + /// This method adds undefined symbols specified by the -u option to the to + /// the list of undefined symbols known to the linker. This option essentially + /// forces an undefined symbol to be created. You may also need to call + /// addDeadStripRoot() for the symbol if your platform supports dead + /// stripping, so that the symbol will not be removed from the output. + void addInitialUndefinedSymbol(StringRef symbolName) { + _initialUndefinedSymbols.push_back(symbolName); + } + + /// Iterators for symbols that appear on the command line. + typedef std::vector<StringRef> StringRefVector; + typedef StringRefVector::iterator StringRefVectorIter; + typedef StringRefVector::const_iterator StringRefVectorConstIter; + + /// Create linker internal files containing atoms for the linker to include + /// during link. Flavors can override this function in their LinkingContext + /// to add more internal files. These internal files are positioned before + /// the actual input files. + virtual void createInternalFiles(std::vector<std::unique_ptr<File>> &) const; + + /// Return the list of undefined symbols that are specified in the + /// linker command line, using the -u option. + ArrayRef<StringRef> initialUndefinedSymbols() const { + return _initialUndefinedSymbols; + } + + /// After all set* methods are called, the Driver calls this method + /// to validate that there are no missing options or invalid combinations + /// of options. If there is a problem, a description of the problem + /// is written to the supplied stream. + /// + /// \returns true if there is an error with the current settings. + bool validate(raw_ostream &diagnostics); + + /// Formats symbol name for use in error messages. + virtual std::string demangle(StringRef symbolName) const = 0; + + /// @} + /// \name Methods used by Driver::link() + /// @{ + + /// Returns the file system path to which the linked output should be written. + /// + /// \todo To support in-memory linking, we need an abstraction that allows + /// the linker to write to an in-memory buffer. + StringRef outputPath() const { return _outputPath; } + + /// Accessor for Register object embedded in LinkingContext. + const Registry ®istry() const { return _registry; } + Registry ®istry() { return _registry; } + + /// This method is called by core linking to give the Writer a chance + /// to add file format specific "files" to set of files to be linked. This is + /// how file format specific atoms can be added to the link. + virtual void createImplicitFiles(std::vector<std::unique_ptr<File>> &) = 0; + + /// This method is called by core linking to build the list of Passes to be + /// run on the merged/linked graph of all input files. + virtual void addPasses(PassManager &pm) = 0; + + /// Calls through to the writeFile() method on the specified Writer. + /// + /// \param linkedFile This is the merged/linked graph of all input file Atoms. + virtual llvm::Error writeFile(const File &linkedFile) const; + + /// Return the next ordinal and Increment it. + virtual uint64_t getNextOrdinalAndIncrement() const { return _nextOrdinal++; } + + // This function is called just before the Resolver kicks in. + // Derived classes may use it to change the list of input files. + virtual void finalizeInputFiles() = 0; + + /// Callback invoked for each file the Resolver decides we are going to load. + /// This can be used to update context state based on the file, and emit + /// errors for any differences between the context state and a loaded file. + /// For example, we can error if we try to load a file which is a different + /// arch from that being linked. + virtual llvm::Error handleLoadedFile(File &file) = 0; + + /// @} +protected: + LinkingContext(); // Must be subclassed + + /// Abstract method to lazily instantiate the Writer. + virtual Writer &writer() const = 0; + + /// Method to create an internal file for the entry symbol + virtual std::unique_ptr<File> createEntrySymbolFile() const; + std::unique_ptr<File> createEntrySymbolFile(StringRef filename) const; + + /// Method to create an internal file for an undefined symbol + virtual std::unique_ptr<File> createUndefinedSymbolFile() const; + std::unique_ptr<File> createUndefinedSymbolFile(StringRef filename) const; + + StringRef _outputPath; + StringRef _entrySymbolName; + bool _deadStrip = false; + bool _globalsAreDeadStripRoots = false; + bool _printRemainingUndefines = true; + bool _allowRemainingUndefines = false; + bool _logInputFiles = false; + bool _allowShlibUndefines = false; + std::vector<StringRef> _deadStripRoots; + std::vector<const char *> _llvmOptions; + StringRefVector _initialUndefinedSymbols; + std::vector<std::unique_ptr<Node>> _nodes; + mutable llvm::BumpPtrAllocator _allocator; + mutable uint64_t _nextOrdinal = 0; + Registry _registry; + +private: + /// Validate the subclass bits. Only called by validate. + virtual bool validateImpl(raw_ostream &diagnostics) = 0; +}; + +} // end namespace lld + +#endif // LLD_CORE_LINKING_CONTEXT_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Node.h b/contrib/llvm/tools/lld/include/lld/Core/Node.h new file mode 100644 index 000000000000..c30482409e7a --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Node.h @@ -0,0 +1,75 @@ +//===- lld/Core/Node.h - Input file class -----------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// The classes in this file represents inputs to the linker. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_NODE_H +#define LLD_CORE_NODE_H + +#include "lld/Core/File.h" +#include <algorithm> +#include <memory> + +namespace lld { + +// A Node represents a FileNode or other type of Node. In the latter case, +// the node contains meta information about the input file list. +// Currently only GroupEnd node is defined as a meta node. +class Node { +public: + enum class Kind { File, GroupEnd }; + + explicit Node(Kind type) : _kind(type) {} + virtual ~Node() = default; + + virtual Kind kind() const { return _kind; } + +private: + Kind _kind; +}; + +// This is a marker for --end-group. getSize() returns the number of +// files between the corresponding --start-group and this marker. +class GroupEnd : public Node { +public: + explicit GroupEnd(int size) : Node(Kind::GroupEnd), _size(size) {} + + int getSize() const { return _size; } + + static bool classof(const Node *a) { + return a->kind() == Kind::GroupEnd; + } + +private: + int _size; +}; + +// A container of File. +class FileNode : public Node { +public: + explicit FileNode(std::unique_ptr<File> f) + : Node(Node::Kind::File), _file(std::move(f)) {} + + static bool classof(const Node *a) { + return a->kind() == Node::Kind::File; + } + + File *getFile() { return _file.get(); } + +protected: + std::unique_ptr<File> _file; +}; + +} // end namespace lld + +#endif // LLD_CORE_NODE_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Parallel.h b/contrib/llvm/tools/lld/include/lld/Core/Parallel.h new file mode 100644 index 000000000000..f241453a4d39 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Parallel.h @@ -0,0 +1,336 @@ +//===- lld/Core/Parallel.h - Parallel utilities ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_PARALLEL_H +#define LLD_CORE_PARALLEL_H + +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LLVM.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/thread.h" + +#include <algorithm> +#include <atomic> +#include <condition_variable> +#include <mutex> +#include <stack> + +#if defined(_MSC_VER) && LLVM_ENABLE_THREADS +#include <concrt.h> +#include <ppl.h> +#endif + +namespace lld { +/// \brief Allows one or more threads to wait on a potentially unknown number of +/// events. +/// +/// A latch starts at \p count. inc() increments this, and dec() decrements it. +/// All calls to sync() will block while the count is not 0. +/// +/// Calling dec() on a Latch with a count of 0 has undefined behaivor. +class Latch { + uint32_t _count; + mutable std::mutex _condMut; + mutable std::condition_variable _cond; + +public: + explicit Latch(uint32_t count = 0) : _count(count) {} + ~Latch() { sync(); } + + void inc() { + std::unique_lock<std::mutex> lock(_condMut); + ++_count; + } + + void dec() { + std::unique_lock<std::mutex> lock(_condMut); + if (--_count == 0) + _cond.notify_all(); + } + + void sync() const { + std::unique_lock<std::mutex> lock(_condMut); + _cond.wait(lock, [&] { + return _count == 0; + }); + } +}; + +// Classes in this namespace are implementation details of this header. +namespace internal { + +/// \brief An abstract class that takes closures and runs them asynchronously. +class Executor { +public: + virtual ~Executor() = default; + virtual void add(std::function<void()> func) = 0; +}; + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +class SyncExecutor : public Executor { +public: + virtual void add(std::function<void()> func) { + func(); + } +}; + +inline Executor *getDefaultExecutor() { + static SyncExecutor exec; + return &exec; +} +#elif defined(_MSC_VER) +/// \brief An Executor that runs tasks via ConcRT. +class ConcRTExecutor : public Executor { + struct Taskish { + Taskish(std::function<void()> task) : _task(task) {} + + std::function<void()> _task; + + static void run(void *p) { + Taskish *self = static_cast<Taskish *>(p); + self->_task(); + concurrency::Free(self); + } + }; + +public: + virtual void add(std::function<void()> func) { + Concurrency::CurrentScheduler::ScheduleTask(Taskish::run, + new (concurrency::Alloc(sizeof(Taskish))) Taskish(func)); + } +}; + +inline Executor *getDefaultExecutor() { + static ConcRTExecutor exec; + return &exec; +} +#else +/// \brief An implementation of an Executor that runs closures on a thread pool +/// in filo order. +class ThreadPoolExecutor : public Executor { +public: + explicit ThreadPoolExecutor(unsigned threadCount = + std::thread::hardware_concurrency()) + : _stop(false), _done(threadCount) { + // Spawn all but one of the threads in another thread as spawning threads + // can take a while. + std::thread([&, threadCount] { + for (size_t i = 1; i < threadCount; ++i) { + std::thread([=] { + work(); + }).detach(); + } + work(); + }).detach(); + } + + ~ThreadPoolExecutor() override { + std::unique_lock<std::mutex> lock(_mutex); + _stop = true; + lock.unlock(); + _cond.notify_all(); + // Wait for ~Latch. + } + + void add(std::function<void()> f) override { + std::unique_lock<std::mutex> lock(_mutex); + _workStack.push(f); + lock.unlock(); + _cond.notify_one(); + } + +private: + void work() { + while (true) { + std::unique_lock<std::mutex> lock(_mutex); + _cond.wait(lock, [&] { + return _stop || !_workStack.empty(); + }); + if (_stop) + break; + auto task = _workStack.top(); + _workStack.pop(); + lock.unlock(); + task(); + } + _done.dec(); + } + + std::atomic<bool> _stop; + std::stack<std::function<void()>> _workStack; + std::mutex _mutex; + std::condition_variable _cond; + Latch _done; +}; + +inline Executor *getDefaultExecutor() { + static ThreadPoolExecutor exec; + return &exec; +} +#endif + +} // namespace internal + +/// \brief Allows launching a number of tasks and waiting for them to finish +/// either explicitly via sync() or implicitly on destruction. +class TaskGroup { + Latch _latch; + +public: + void spawn(std::function<void()> f) { + _latch.inc(); + internal::getDefaultExecutor()->add([&, f] { + f(); + _latch.dec(); + }); + } + + void sync() const { _latch.sync(); } +}; + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +template <class RandomAccessIterator, class Comp> +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits<RandomAccessIterator>::value_type>()) { + std::sort(start, end, comp); +} +#elif defined(_MSC_VER) +// Use ppl parallel_sort on Windows. +template <class RandomAccessIterator, class Comp> +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits<RandomAccessIterator>::value_type>()) { + concurrency::parallel_sort(start, end, comp); +} +#else +namespace detail { +const ptrdiff_t minParallelSize = 1024; + +/// \brief Inclusive median. +template <class RandomAccessIterator, class Comp> +RandomAccessIterator medianOf3(RandomAccessIterator start, + RandomAccessIterator end, const Comp &comp) { + RandomAccessIterator mid = start + (std::distance(start, end) / 2); + return comp(*start, *(end - 1)) + ? (comp(*mid, *(end - 1)) ? (comp(*start, *mid) ? mid : start) + : end - 1) + : (comp(*mid, *start) ? (comp(*(end - 1), *mid) ? mid : end - 1) + : start); +} + +template <class RandomAccessIterator, class Comp> +void parallel_quick_sort(RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp, TaskGroup &tg, size_t depth) { + // Do a sequential sort for small inputs. + if (std::distance(start, end) < detail::minParallelSize || depth == 0) { + std::sort(start, end, comp); + return; + } + + // Partition. + auto pivot = medianOf3(start, end, comp); + // Move pivot to end. + std::swap(*(end - 1), *pivot); + pivot = std::partition(start, end - 1, [&comp, end](decltype(*start) v) { + return comp(v, *(end - 1)); + }); + // Move pivot to middle of partition. + std::swap(*pivot, *(end - 1)); + + // Recurse. + tg.spawn([=, &comp, &tg] { + parallel_quick_sort(start, pivot, comp, tg, depth - 1); + }); + parallel_quick_sort(pivot + 1, end, comp, tg, depth - 1); +} +} + +template <class RandomAccessIterator, class Comp> +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits<RandomAccessIterator>::value_type>()) { + TaskGroup tg; + detail::parallel_quick_sort(start, end, comp, tg, + llvm::Log2_64(std::distance(start, end)) + 1); +} +#endif + +template <class T> void parallel_sort(T *start, T *end) { + parallel_sort(start, end, std::less<T>()); +} + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +template <class IterTy, class FuncTy> +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + std::for_each(Begin, End, Fn); +} + +template <class IndexTy, class FuncTy> +void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { + for (IndexTy I = Begin; I != End; ++I) + Fn(I); +} +#elif defined(_MSC_VER) +// Use ppl parallel_for_each on Windows. +template <class IterTy, class FuncTy> +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + concurrency::parallel_for_each(Begin, End, Fn); +} + +template <class IndexTy, class FuncTy> +void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { + concurrency::parallel_for(Begin, End, Fn); +} +#else +template <class IterTy, class FuncTy> +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + // TaskGroup has a relatively high overhead, so we want to reduce + // the number of spawn() calls. We'll create up to 1024 tasks here. + // (Note that 1024 is an arbitrary number. This code probably needs + // improving to take the number of available cores into account.) + ptrdiff_t TaskSize = std::distance(Begin, End) / 1024; + if (TaskSize == 0) + TaskSize = 1; + + TaskGroup Tg; + while (TaskSize <= std::distance(Begin, End)) { + Tg.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); }); + Begin += TaskSize; + } + Tg.spawn([=, &Fn] { std::for_each(Begin, End, Fn); }); +} + +template <class IndexTy, class FuncTy> +void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { + ptrdiff_t TaskSize = (End - Begin) / 1024; + if (TaskSize == 0) + TaskSize = 1; + + TaskGroup Tg; + IndexTy I = Begin; + for (; I < End; I += TaskSize) { + Tg.spawn([=, &Fn] { + for (IndexTy J = I, E = I + TaskSize; J != E; ++J) + Fn(J); + }); + Begin += TaskSize; + } + Tg.spawn([=, &Fn] { + for (IndexTy J = I; J < End; ++J) + Fn(J); + }); +} +#endif +} // end namespace lld + +#endif // LLD_CORE_PARALLEL_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Pass.h b/contrib/llvm/tools/lld/include/lld/Core/Pass.h new file mode 100644 index 000000000000..bfe3f9b10e0c --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Pass.h @@ -0,0 +1,43 @@ +//===------ Core/Pass.h - Base class for linker passes ----------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_PASS_H +#define LLD_CORE_PASS_H + +#include "llvm/Support/Error.h" + +namespace lld { + +class SimpleFile; + +/// Once the core linking is done (which resolves references, coalesces atoms +/// and produces a complete Atom graph), the linker runs a series of passes +/// on the Atom graph. The graph is modeled as a File, which means the pass +/// has access to all the atoms and to File level attributes. Each pass does +/// a particular transformation to the Atom graph or to the File attributes. +/// +/// This is the abstract base class for all passes. A Pass does its +/// actual work in it perform() method. It can iterator over Atoms in the +/// graph using the *begin()/*end() atom iterator of the File. It can add +/// new Atoms to the graph using the File's addAtom() method. +class Pass { +public: + virtual ~Pass() = default; + + /// Do the actual work of the Pass. + virtual llvm::Error perform(SimpleFile &mergedFile) = 0; + +protected: + // Only subclassess can be instantiated. + Pass() = default; +}; + +} // end namespace lld + +#endif // LLD_CORE_PASS_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/PassManager.h b/contrib/llvm/tools/lld/include/lld/Core/PassManager.h new file mode 100644 index 000000000000..09b417a2985d --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/PassManager.h @@ -0,0 +1,48 @@ +//===- lld/Core/PassManager.h - Manage linker passes ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_PASS_MANAGER_H +#define LLD_CORE_PASS_MANAGER_H + +#include "lld/Core/LLVM.h" +#include "lld/Core/Pass.h" +#include "llvm/Support/Error.h" +#include <memory> +#include <vector> + +namespace lld { +class SimpleFile; +class Pass; + +/// \brief Owns and runs a collection of passes. +/// +/// This class is currently just a container for passes and a way to run them. +/// +/// In the future this should handle timing pass runs, running parallel passes, +/// and validate/satisfy pass dependencies. +class PassManager { +public: + void add(std::unique_ptr<Pass> pass) { + _passes.push_back(std::move(pass)); + } + + llvm::Error runOnFile(SimpleFile &file) { + for (std::unique_ptr<Pass> &pass : _passes) + if (llvm::Error EC = pass->perform(file)) + return EC; + return llvm::Error::success(); + } + +private: + /// \brief Passes in the order they should run. + std::vector<std::unique_ptr<Pass>> _passes; +}; +} // end namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/include/lld/Core/Reader.h b/contrib/llvm/tools/lld/include/lld/Core/Reader.h new file mode 100644 index 000000000000..5105eb1aa2be --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Reader.h @@ -0,0 +1,156 @@ +//===- lld/Core/Reader.h - Abstract File Format Reading Interface ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_READER_H +#define LLD_CORE_READER_H + +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include <memory> +#include <vector> + +using llvm::sys::fs::file_magic; + +namespace llvm { +namespace yaml { +class IO; +} // end namespace yaml +} // end namespace llvm + +namespace lld { + +class File; +class LinkingContext; +class MachOLinkingContext; + +/// \brief An abstract class for reading object files, library files, and +/// executable files. +/// +/// Each file format (e.g. mach-o, etc) has a concrete subclass of Reader. +class Reader { +public: + virtual ~Reader() = default; + + /// Sniffs the file to determine if this Reader can parse it. + /// The method is called with: + /// 1) the file_magic enumeration returned by identify_magic() + /// 2) the whole file content buffer if the above is not enough. + virtual bool canParse(file_magic magic, MemoryBufferRef mb) const = 0; + + /// \brief Parse a supplied buffer (already filled with the contents of a + /// file) and create a File object. + /// The resulting File object takes ownership of the MemoryBuffer. + virtual ErrorOr<std::unique_ptr<File>> + loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &) const = 0; +}; + +/// \brief An abstract class for handling alternate yaml representations +/// of object files. +/// +/// The YAML syntax allows "tags" which are used to specify the type of +/// the YAML node. In lld, top level YAML documents can be in many YAML +/// representations (e.g mach-o encoded as yaml, etc). A tag is used to +/// specify which representation is used in the following YAML document. +/// To work, there must be a YamlIOTaggedDocumentHandler registered that +/// handles each tag type. +class YamlIOTaggedDocumentHandler { +public: + virtual ~YamlIOTaggedDocumentHandler(); + + /// This method is called on each registered YamlIOTaggedDocumentHandler + /// until one returns true. If the subclass handles tag type !xyz, then + /// this method should call io.mapTag("!xzy") to see if that is the current + /// document type, and if so, process the rest of the document using + /// YAML I/O, then convert the result into an lld::File* and return it. + virtual bool handledDocTag(llvm::yaml::IO &io, const lld::File *&f) const = 0; +}; + +/// A registry to hold the list of currently registered Readers and +/// tables which map Reference kind values to strings. +/// The linker does not directly invoke Readers. Instead, it registers +/// Readers based on it configuration and command line options, then calls +/// the Registry object to parse files. +class Registry { +public: + Registry(); + + /// Walk the list of registered Readers and find one that can parse the + /// supplied file and parse it. + ErrorOr<std::unique_ptr<File>> + loadFile(std::unique_ptr<MemoryBuffer> mb) const; + + /// Walk the list of registered kind tables to convert a Reference Kind + /// name to a value. + bool referenceKindFromString(StringRef inputStr, Reference::KindNamespace &ns, + Reference::KindArch &a, + Reference::KindValue &value) const; + + /// Walk the list of registered kind tables to convert a Reference Kind + /// value to a string. + bool referenceKindToString(Reference::KindNamespace ns, Reference::KindArch a, + Reference::KindValue value, StringRef &) const; + + /// Walk the list of registered tag handlers and have the one that handles + /// the current document type process the yaml into an lld::File*. + bool handleTaggedDoc(llvm::yaml::IO &io, const lld::File *&file) const; + + // These methods are called to dynamically add support for various file + // formats. The methods are also implemented in the appropriate lib*.a + // library, so that the code for handling a format is only linked in, if this + // method is used. Any options that a Reader might need must be passed + // as parameters to the addSupport*() method. + void addSupportArchives(bool logLoading); + void addSupportYamlFiles(); + void addSupportMachOObjects(MachOLinkingContext &); + + /// To convert between kind values and names, the registry walks the list + /// of registered kind tables. Each table is a zero terminated array of + /// KindStrings elements. + struct KindStrings { + Reference::KindValue value; + StringRef name; + }; + + /// A Reference Kind value is a tuple of <namespace, arch, value>. All + /// entries in a conversion table have the same <namespace, arch>. The + /// array then contains the value/name pairs. + void addKindTable(Reference::KindNamespace ns, Reference::KindArch arch, + const KindStrings array[]); + +private: + struct KindEntry { + Reference::KindNamespace ns; + Reference::KindArch arch; + const KindStrings *array; + }; + + void add(std::unique_ptr<Reader>); + void add(std::unique_ptr<YamlIOTaggedDocumentHandler>); + + std::vector<std::unique_ptr<Reader>> _readers; + std::vector<std::unique_ptr<YamlIOTaggedDocumentHandler>> _yamlHandlers; + std::vector<KindEntry> _kindEntries; +}; + +// Utilities for building a KindString table. For instance: +// static const Registry::KindStrings table[] = { +// LLD_KIND_STRING_ENTRY(R_VAX_ADDR16), +// LLD_KIND_STRING_ENTRY(R_VAX_DATA16), +// LLD_KIND_STRING_END +// }; +#define LLD_KIND_STRING_ENTRY(name) { name, #name } +#define LLD_KIND_STRING_END { 0, "" } + +} // end namespace lld + +#endif // LLD_CORE_READER_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Reference.h b/contrib/llvm/tools/lld/include/lld/Core/Reference.h new file mode 100644 index 000000000000..1d3003c84616 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Reference.h @@ -0,0 +1,119 @@ +//===- Core/References.h - A Reference to Another Atom ----------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_REFERENCES_H +#define LLD_CORE_REFERENCES_H + +#include <cstdint> + +namespace lld { + +class Atom; + +/// +/// The linker has a Graph Theory model of linking. An object file is seen +/// as a set of Atoms with References to other Atoms. Each Atom is a node +/// and each Reference is an edge. +/// +/// For example if a function contains a call site to "malloc" 40 bytes into +/// the Atom, then the function Atom will have a Reference of: offsetInAtom=40, +/// kind=callsite, target=malloc, addend=0. +/// +/// Besides supporting traditional "relocations", references are also used +/// forcing layout (one atom must follow another), marking data-in-code +/// (jump tables or ARM constants), etc. +/// +/// The "kind" of a reference is a tuple of <namespace, arch, value>. This +/// enable us to re-use existing relocation types definded for various +/// file formats and architectures. +/// +/// References and atoms form a directed graph. The dead-stripping pass +/// traverses them starting from dead-strip root atoms to garbage collect +/// unreachable ones. +/// +/// References of any kind are considered as directed edges. In addition to +/// that, references of some kind is considered as bidirected edges. +class Reference { +public: + /// Which universe defines the kindValue(). + enum class KindNamespace { + all = 0, + testing = 1, + mach_o = 2, + }; + + KindNamespace kindNamespace() const { return (KindNamespace)_kindNamespace; } + void setKindNamespace(KindNamespace ns) { _kindNamespace = (uint8_t)ns; } + + // Which architecture the kind value is for. + enum class KindArch { all, AArch64, ARM, x86, x86_64}; + + KindArch kindArch() const { return (KindArch)_kindArch; } + void setKindArch(KindArch a) { _kindArch = (uint8_t)a; } + + typedef uint16_t KindValue; + + KindValue kindValue() const { return _kindValue; } + + /// setKindValue() is needed because during linking, some optimizations may + /// change the codegen and hence the reference kind. + void setKindValue(KindValue value) { + _kindValue = value; + } + + /// KindValues used with KindNamespace::all and KindArch::all. + enum { + // kindLayoutAfter is treated as a bidirected edge by the dead-stripping + // pass. + kindLayoutAfter = 1, + kindAssociate, + }; + + // A value to be added to the value of a target + typedef int64_t Addend; + + /// If the reference is a fixup in the Atom, then this returns the + /// byte offset into the Atom's content to do the fix up. + virtual uint64_t offsetInAtom() const = 0; + + /// Returns the atom this reference refers to. + virtual const Atom *target() const = 0; + + /// During linking, the linker may merge graphs which coalesces some nodes + /// (i.e. Atoms). To switch the target of a reference, this method is called. + virtual void setTarget(const Atom *) = 0; + + /// Some relocations require a symbol and a value (e.g. foo + 4). + virtual Addend addend() const = 0; + + /// During linking, some optimzations may change addend value. + virtual void setAddend(Addend) = 0; + + /// Returns target specific attributes of the reference. + virtual uint32_t tag() const { return 0; } + +protected: + /// Reference is an abstract base class. Only subclasses can use constructor. + Reference(KindNamespace ns, KindArch a, KindValue value) + : _kindValue(value), _kindNamespace((uint8_t)ns), _kindArch((uint8_t)a) {} + + /// The memory for Reference objects is always managed by the owning File + /// object. Therefore, no one but the owning File object should call + /// delete on an Reference. In fact, some File objects may bulk allocate + /// an array of References, so they cannot be individually deleted by anyone. + virtual ~Reference() = default; + + KindValue _kindValue; + uint8_t _kindNamespace; + uint8_t _kindArch; +}; + +} // end namespace lld + +#endif // LLD_CORE_REFERENCES_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Reproduce.h b/contrib/llvm/tools/lld/include/lld/Core/Reproduce.h new file mode 100644 index 000000000000..6e1d36a54916 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Reproduce.h @@ -0,0 +1,39 @@ +//===- Reproduce.h - Utilities for creating reproducers ---------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_REPRODUCE_H +#define LLD_CORE_REPRODUCE_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace opt { class Arg; } +} + +namespace lld { + +// Makes a given pathname an absolute path first, and then remove +// beginning /. For example, "../foo.o" is converted to "home/john/foo.o", +// assuming that the current directory is "/home/john/bar". +std::string relativeToRoot(StringRef Path); + +// Quote a given string if it contains a space character. +std::string quote(StringRef S); + +// Rewrite the given path if a file exists with that pathname, otherwise +// returns the original path. +std::string rewritePath(StringRef S); + +// Returns the string form of the given argument. +std::string toString(llvm::opt::Arg *Arg); +} + +#endif diff --git a/contrib/llvm/tools/lld/include/lld/Core/Resolver.h b/contrib/llvm/tools/lld/include/lld/Core/Resolver.h new file mode 100644 index 000000000000..fb62a779c0a5 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Resolver.h @@ -0,0 +1,106 @@ +//===- Core/Resolver.h - Resolves Atom References -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_RESOLVER_H +#define LLD_CORE_RESOLVER_H + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/Simple.h" +#include "lld/Core/SymbolTable.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/ErrorOr.h" +#include <set> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +namespace lld { + +class Atom; +class LinkingContext; + +/// \brief The Resolver is responsible for merging all input object files +/// and producing a merged graph. +class Resolver { +public: + Resolver(LinkingContext &ctx) : _ctx(ctx), _result(new MergedFile()) {} + + // InputFiles::Handler methods + void doDefinedAtom(OwningAtomPtr<DefinedAtom> atom); + bool doUndefinedAtom(OwningAtomPtr<UndefinedAtom> atom); + void doSharedLibraryAtom(OwningAtomPtr<SharedLibraryAtom> atom); + void doAbsoluteAtom(OwningAtomPtr<AbsoluteAtom> atom); + + // Handle files, this adds atoms from the current file thats + // being processed by the resolver + llvm::Expected<bool> handleFile(File &); + + // Handle an archive library file. + llvm::Expected<bool> handleArchiveFile(File &); + + // Handle a shared library file. + llvm::Error handleSharedLibrary(File &); + + /// @brief do work of merging and resolving and return list + bool resolve(); + + std::unique_ptr<SimpleFile> resultFile() { return std::move(_result); } + +private: + typedef std::function<llvm::Expected<bool>(StringRef)> UndefCallback; + + bool undefinesAdded(int begin, int end); + File *getFile(int &index); + + /// \brief The main function that iterates over the files to resolve + bool resolveUndefines(); + void updateReferences(); + void deadStripOptimize(); + bool checkUndefines(); + void removeCoalescedAwayAtoms(); + llvm::Expected<bool> forEachUndefines(File &file, UndefCallback callback); + + void markLive(const Atom *atom); + + class MergedFile : public SimpleFile { + public: + MergedFile() : SimpleFile("<linker-internal>", kindResolverMergedObject) {} + void addAtoms(llvm::MutableArrayRef<OwningAtomPtr<Atom>> atoms); + }; + + LinkingContext &_ctx; + SymbolTable _symbolTable; + std::vector<OwningAtomPtr<Atom>> _atoms; + std::set<const Atom *> _deadStripRoots; + llvm::DenseSet<const Atom *> _liveAtoms; + llvm::DenseSet<const Atom *> _deadAtoms; + std::unique_ptr<MergedFile> _result; + std::unordered_multimap<const Atom *, const Atom *> _reverseRef; + + // --start-group and --end-group + std::vector<File *> _files; + std::map<File *, bool> _newUndefinesAdded; + + // List of undefined symbols. + std::vector<StringRef> _undefines; + + // Start position in _undefines for each archive/shared library file. + // Symbols from index 0 to the start position are already searched before. + // Searching them again would never succeed. When we look for undefined + // symbols from an archive/shared library file, start from its start + // position to save time. + std::map<File *, size_t> _undefineIndex; +}; + +} // namespace lld + +#endif // LLD_CORE_RESOLVER_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryAtom.h b/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryAtom.h new file mode 100644 index 000000000000..7fec7a3e3d29 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryAtom.h @@ -0,0 +1,53 @@ +//===- Core/SharedLibraryAtom.h - A Shared Library Atom -------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SHARED_LIBRARY_ATOM_H +#define LLD_CORE_SHARED_LIBRARY_ATOM_H + +#include "lld/Core/Atom.h" + +namespace lld { + +/// A SharedLibraryAtom has no content. +/// It exists to represent a symbol which will be bound at runtime. +class SharedLibraryAtom : public Atom { +public: + enum class Type : uint32_t { + Unknown, + Code, + Data, + }; + + /// Returns shared library name used to load it at runtime. + /// On Darwin it is the LC_DYLIB_LOAD dylib name. + virtual StringRef loadName() const = 0; + + /// Returns if shared library symbol can be missing at runtime and if + /// so the loader should silently resolve address of symbol to be nullptr. + virtual bool canBeNullAtRuntime() const = 0; + + virtual Type type() const = 0; + + virtual uint64_t size() const = 0; + + static bool classof(const Atom *a) { + return a->definition() == definitionSharedLibrary; + } + + static inline bool classof(const SharedLibraryAtom *) { return true; } + +protected: + SharedLibraryAtom() : Atom(definitionSharedLibrary) {} + + ~SharedLibraryAtom() override = default; +}; + +} // namespace lld + +#endif // LLD_CORE_SHARED_LIBRARY_ATOM_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryFile.h b/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryFile.h new file mode 100644 index 000000000000..53bf967b0236 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryFile.h @@ -0,0 +1,70 @@ +//===- Core/SharedLibraryFile.h - Models shared libraries as Atoms --------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SHARED_LIBRARY_FILE_H +#define LLD_CORE_SHARED_LIBRARY_FILE_H + +#include "lld/Core/File.h" + +namespace lld { + +/// +/// The SharedLibraryFile subclass of File is used to represent dynamic +/// shared libraries being linked against. +/// +class SharedLibraryFile : public File { +public: + static bool classof(const File *f) { + return f->kind() == kindSharedLibrary; + } + + /// Check if the shared library exports a symbol with the specified name. + /// If so, return a SharedLibraryAtom which represents that exported + /// symbol. Otherwise return nullptr. + virtual OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const = 0; + + // Returns the install name. + virtual StringRef getDSOName() const = 0; + + const AtomRange<DefinedAtom> defined() const override { + return _definedAtoms; + } + + const AtomRange<UndefinedAtom> undefined() const override { + return _undefinedAtoms; + } + + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const AtomRange<AbsoluteAtom> absolute() const override { + return _absoluteAtoms; + } + + void clearAtoms() override { + _definedAtoms.clear(); + _undefinedAtoms.clear(); + _sharedLibraryAtoms.clear(); + _absoluteAtoms.clear(); + } + +protected: + /// only subclasses of SharedLibraryFile can be instantiated + explicit SharedLibraryFile(StringRef path) : File(path, kindSharedLibrary) {} + + AtomVector<DefinedAtom> _definedAtoms; + AtomVector<UndefinedAtom> _undefinedAtoms; + AtomVector<SharedLibraryAtom> _sharedLibraryAtoms; + AtomVector<AbsoluteAtom> _absoluteAtoms; +}; + +} // namespace lld + +#endif // LLD_CORE_SHARED_LIBRARY_FILE_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Simple.h b/contrib/llvm/tools/lld/include/lld/Core/Simple.h new file mode 100644 index 000000000000..3aa7abf5d12b --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Simple.h @@ -0,0 +1,271 @@ +//===- lld/Core/Simple.h - Simple implementations of Atom and File --------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Provide simple implementations for Atoms and File. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SIMPLE_H +#define LLD_CORE_SIMPLE_H + +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> + +namespace lld { + +class SimpleFile : public File { +public: + SimpleFile(StringRef path, File::Kind kind) + : File(path, kind) {} + + ~SimpleFile() override { + _defined.clear(); + _undefined.clear(); + _shared.clear(); + _absolute.clear(); + } + + void addAtom(DefinedAtom &a) { + _defined.push_back(OwningAtomPtr<DefinedAtom>(&a)); + } + void addAtom(UndefinedAtom &a) { + _undefined.push_back(OwningAtomPtr<UndefinedAtom>(&a)); + } + void addAtom(SharedLibraryAtom &a) { + _shared.push_back(OwningAtomPtr<SharedLibraryAtom>(&a)); + } + void addAtom(AbsoluteAtom &a) { + _absolute.push_back(OwningAtomPtr<AbsoluteAtom>(&a)); + } + + void addAtom(const Atom &atom) { + if (auto *p = dyn_cast<DefinedAtom>(&atom)) { + addAtom(const_cast<DefinedAtom &>(*p)); + } else if (auto *p = dyn_cast<UndefinedAtom>(&atom)) { + addAtom(const_cast<UndefinedAtom &>(*p)); + } else if (auto *p = dyn_cast<SharedLibraryAtom>(&atom)) { + addAtom(const_cast<SharedLibraryAtom &>(*p)); + } else if (auto *p = dyn_cast<AbsoluteAtom>(&atom)) { + addAtom(const_cast<AbsoluteAtom &>(*p)); + } else { + llvm_unreachable("atom has unknown definition kind"); + } + } + + void removeDefinedAtomsIf(std::function<bool(const DefinedAtom *)> pred) { + auto &atoms = _defined; + auto newEnd = std::remove_if(atoms.begin(), atoms.end(), + [&pred](OwningAtomPtr<DefinedAtom> &p) { + return pred(p.get()); + }); + atoms.erase(newEnd, atoms.end()); + } + + const AtomRange<DefinedAtom> defined() const override { return _defined; } + + const AtomRange<UndefinedAtom> undefined() const override { + return _undefined; + } + + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + return _shared; + } + + const AtomRange<AbsoluteAtom> absolute() const override { + return _absolute; + } + + void clearAtoms() override { + _defined.clear(); + _undefined.clear(); + _shared.clear(); + _absolute.clear(); + } + +private: + AtomVector<DefinedAtom> _defined; + AtomVector<UndefinedAtom> _undefined; + AtomVector<SharedLibraryAtom> _shared; + AtomVector<AbsoluteAtom> _absolute; +}; + +class SimpleReference : public Reference, + public llvm::ilist_node<SimpleReference> { +public: + SimpleReference(Reference::KindNamespace ns, Reference::KindArch arch, + Reference::KindValue value, uint64_t off, const Atom *t, + Reference::Addend a) + : Reference(ns, arch, value), _target(t), _offsetInAtom(off), _addend(a) { + } + SimpleReference() + : Reference(Reference::KindNamespace::all, Reference::KindArch::all, 0), + _target(nullptr), _offsetInAtom(0), _addend(0) {} + + uint64_t offsetInAtom() const override { return _offsetInAtom; } + + const Atom *target() const override { + assert(_target); + return _target; + } + + Addend addend() const override { return _addend; } + void setAddend(Addend a) override { _addend = a; } + void setTarget(const Atom *newAtom) override { _target = newAtom; } + +private: + const Atom *_target; + uint64_t _offsetInAtom; + Addend _addend; +}; + +class SimpleDefinedAtom : public DefinedAtom { +public: + explicit SimpleDefinedAtom(const File &f) + : _file(f), _ordinal(f.getNextAtomOrdinalAndIncrement()) {} + + ~SimpleDefinedAtom() override { + _references.clearAndLeakNodesUnsafely(); + } + + const File &file() const override { return _file; } + + StringRef name() const override { return StringRef(); } + + uint64_t ordinal() const override { return _ordinal; } + + Scope scope() const override { return DefinedAtom::scopeLinkageUnit; } + + Interposable interposable() const override { + return DefinedAtom::interposeNo; + } + + Merge merge() const override { return DefinedAtom::mergeNo; } + + Alignment alignment() const override { return 1; } + + SectionChoice sectionChoice() const override { + return DefinedAtom::sectionBasedOnContent; + } + + StringRef customSectionName() const override { return StringRef(); } + DeadStripKind deadStrip() const override { + return DefinedAtom::deadStripNormal; + } + + DefinedAtom::reference_iterator begin() const override { + const void *it = + reinterpret_cast<const void *>(_references.begin().getNodePtr()); + return reference_iterator(*this, it); + } + + DefinedAtom::reference_iterator end() const override { + const void *it = + reinterpret_cast<const void *>(_references.end().getNodePtr()); + return reference_iterator(*this, it); + } + + const Reference *derefIterator(const void *it) const override { + return &*RefList::const_iterator( + *reinterpret_cast<const llvm::ilist_node<SimpleReference> *>(it)); + } + + void incrementIterator(const void *&it) const override { + RefList::const_iterator ref( + *reinterpret_cast<const llvm::ilist_node<SimpleReference> *>(it)); + it = reinterpret_cast<const void *>(std::next(ref).getNodePtr()); + } + + void addReference(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue kindValue, uint64_t off, + const Atom *target, Reference::Addend a) override { + assert(target && "trying to create reference to nothing"); + auto node = new (_file.allocator()) + SimpleReference(ns, arch, kindValue, off, target, a); + _references.push_back(node); + } + + /// Sort references in a canonical order (by offset, then by kind). + void sortReferences() const { + // Cannot sort a linked list, so move elements into a temporary vector, + // sort the vector, then reconstruct the list. + llvm::SmallVector<SimpleReference *, 16> elements; + for (SimpleReference &node : _references) { + elements.push_back(&node); + } + std::sort(elements.begin(), elements.end(), + [] (const SimpleReference *lhs, const SimpleReference *rhs) -> bool { + uint64_t lhsOffset = lhs->offsetInAtom(); + uint64_t rhsOffset = rhs->offsetInAtom(); + if (rhsOffset != lhsOffset) + return (lhsOffset < rhsOffset); + if (rhs->kindNamespace() != lhs->kindNamespace()) + return (lhs->kindNamespace() < rhs->kindNamespace()); + if (rhs->kindArch() != lhs->kindArch()) + return (lhs->kindArch() < rhs->kindArch()); + return (lhs->kindValue() < rhs->kindValue()); + }); + _references.clearAndLeakNodesUnsafely(); + for (SimpleReference *node : elements) { + _references.push_back(node); + } + } + + void setOrdinal(uint64_t ord) { _ordinal = ord; } + +private: + typedef llvm::ilist<SimpleReference> RefList; + + const File &_file; + uint64_t _ordinal; + mutable RefList _references; +}; + +class SimpleUndefinedAtom : public UndefinedAtom { +public: + SimpleUndefinedAtom(const File &f, StringRef name) : _file(f), _name(name) { + assert(!name.empty() && "UndefinedAtoms must have a name"); + } + + ~SimpleUndefinedAtom() override = default; + + /// file - returns the File that produced/owns this Atom + const File &file() const override { return _file; } + + /// name - The name of the atom. For a function atom, it is the (mangled) + /// name of the function. + StringRef name() const override { return _name; } + + CanBeNull canBeNull() const override { return UndefinedAtom::canBeNullNever; } + +private: + const File &_file; + StringRef _name; +}; + +} // end namespace lld + +#endif // LLD_CORE_SIMPLE_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/SymbolTable.h b/contrib/llvm/tools/lld/include/lld/Core/SymbolTable.h new file mode 100644 index 000000000000..ba4951e5bd13 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/SymbolTable.h @@ -0,0 +1,96 @@ +//===- Core/SymbolTable.h - Main Symbol Table -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SYMBOL_TABLE_H +#define LLD_CORE_SYMBOL_TABLE_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include <cstring> +#include <map> +#include <vector> + +namespace lld { + +class AbsoluteAtom; +class Atom; +class DefinedAtom; +class LinkingContext; +class ResolverOptions; +class SharedLibraryAtom; +class UndefinedAtom; + +/// \brief The SymbolTable class is responsible for coalescing atoms. +/// +/// All atoms coalescable by-name or by-content should be added. +/// The method replacement() can be used to find the replacement atom +/// if an atom has been coalesced away. +class SymbolTable { +public: + /// @brief add atom to symbol table + bool add(const DefinedAtom &); + + /// @brief add atom to symbol table + bool add(const UndefinedAtom &); + + /// @brief add atom to symbol table + bool add(const SharedLibraryAtom &); + + /// @brief add atom to symbol table + bool add(const AbsoluteAtom &); + + /// @brief returns atom in symbol table for specified name (or nullptr) + const Atom *findByName(StringRef sym); + + /// @brief returns vector of remaining UndefinedAtoms + std::vector<const UndefinedAtom *> undefines(); + + /// @brief if atom has been coalesced away, return replacement, else return atom + const Atom *replacement(const Atom *); + + /// @brief if atom has been coalesced away, return true + bool isCoalescedAway(const Atom *); + +private: + typedef llvm::DenseMap<const Atom *, const Atom *> AtomToAtom; + + struct StringRefMappingInfo { + static StringRef getEmptyKey() { return StringRef(); } + static StringRef getTombstoneKey() { return StringRef(" ", 1); } + static unsigned getHashValue(StringRef const val) { + return llvm::HashString(val); + } + static bool isEqual(StringRef const lhs, StringRef const rhs) { + return lhs.equals(rhs); + } + }; + typedef llvm::DenseMap<StringRef, const Atom *, + StringRefMappingInfo> NameToAtom; + + struct AtomMappingInfo { + static const DefinedAtom * getEmptyKey() { return nullptr; } + static const DefinedAtom * getTombstoneKey() { return (DefinedAtom*)(-1); } + static unsigned getHashValue(const DefinedAtom * const Val); + static bool isEqual(const DefinedAtom * const LHS, + const DefinedAtom * const RHS); + }; + typedef llvm::DenseSet<const DefinedAtom*, AtomMappingInfo> AtomContentSet; + + bool addByName(const Atom &); + bool addByContent(const DefinedAtom &); + + AtomToAtom _replacedAtoms; + NameToAtom _nameTable; + AtomContentSet _contentTable; +}; + +} // namespace lld + +#endif // LLD_CORE_SYMBOL_TABLE_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/TODO.txt b/contrib/llvm/tools/lld/include/lld/Core/TODO.txt new file mode 100644 index 000000000000..8b523045de75 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/TODO.txt @@ -0,0 +1,17 @@ +include/lld/Core +~~~~~~~~~~~~~~~~ + +* The yaml reader/writer interfaces should be changed to return + an explanatory string if there is an error. The existing error_code + abstraction only works for returning low level OS errors. It does not + work for describing formatting issues. + +* We need to design a diagnostics interface. It would be nice to share code + with Clang_ where possible. + +* We need to add more attributes to File. In particular, we need cpu + and OS information (like target triples). We should also provide explicit + support for `LLVM IR module flags metadata`__. + +.. __: http://llvm.org/docs/LangRef.html#module_flags +.. _Clang: http://clang.llvm.org/docs/InternalsManual.html#Diagnostics diff --git a/contrib/llvm/tools/lld/include/lld/Core/UndefinedAtom.h b/contrib/llvm/tools/lld/include/lld/Core/UndefinedAtom.h new file mode 100644 index 000000000000..f45d6ecda6b0 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/UndefinedAtom.h @@ -0,0 +1,68 @@ +//===- Core/UndefinedAtom.h - An Undefined Atom ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_UNDEFINED_ATOM_H +#define LLD_CORE_UNDEFINED_ATOM_H + +#include "lld/Core/Atom.h" + +namespace lld { + +/// An UndefinedAtom has no content. +/// It exists as a placeholder for a future atom. +class UndefinedAtom : public Atom { +public: + /// Whether this undefined symbol needs to be resolved, + /// or whether it can just evaluate to nullptr. + /// This concept is often called "weak", but that term + /// is overloaded to mean other things too. + enum CanBeNull { + /// Normal symbols must be resolved at build time + canBeNullNever, + + /// This symbol can be missing at runtime and will evalute to nullptr. + /// That is, the static linker still must find a definition (usually + /// is some shared library), but at runtime, the dynamic loader + /// will allow the symbol to be missing and resolved to nullptr. + /// + /// On Darwin this is generated using a function prototype with + /// __attribute__((weak_import)). + /// On linux this is generated using a function prototype with + /// __attribute__((weak)). + /// On Windows this feature is not supported. + canBeNullAtRuntime, + + /// This symbol can be missing at build time. + /// That is, the static linker will not error if a definition for + /// this symbol is not found at build time. Instead, the linker + /// will build an executable that lets the dynamic loader find the + /// symbol at runtime. + /// This feature is not supported on Darwin nor Windows. + /// On linux this is generated using a function prototype with + /// __attribute__((weak)). + canBeNullAtBuildtime + }; + + virtual CanBeNull canBeNull() const = 0; + + static bool classof(const Atom *a) { + return a->definition() == definitionUndefined; + } + + static bool classof(const UndefinedAtom *) { return true; } + +protected: + UndefinedAtom() : Atom(definitionUndefined) {} + + ~UndefinedAtom() override = default; +}; + +} // namespace lld + +#endif // LLD_CORE_UNDEFINED_ATOM_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Writer.h b/contrib/llvm/tools/lld/include/lld/Core/Writer.h new file mode 100644 index 000000000000..216f934916bc --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Writer.h @@ -0,0 +1,47 @@ +//===- lld/Core/Writer.h - Abstract File Format Interface -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_WRITER_H +#define LLD_CORE_WRITER_H + +#include "lld/Core/LLVM.h" +#include "llvm/Support/Error.h" +#include <memory> +#include <vector> + +namespace lld { +class File; +class LinkingContext; +class MachOLinkingContext; + +/// \brief The Writer is an abstract class for writing object files, shared +/// library files, and executable files. Each file format (e.g. mach-o, etc) +/// has a concrete subclass of Writer. +class Writer { +public: + virtual ~Writer(); + + /// \brief Write a file from the supplied File object + virtual llvm::Error writeFile(const File &linkedFile, StringRef path) = 0; + + /// \brief This method is called by Core Linking to give the Writer a chance + /// to add file format specific "files" to set of files to be linked. This is + /// how file format specific atoms can be added to the link. + virtual void createImplicitFiles(std::vector<std::unique_ptr<File>> &) {} + +protected: + // only concrete subclasses can be instantiated + Writer(); +}; + +std::unique_ptr<Writer> createWriterMachO(const MachOLinkingContext &); +std::unique_ptr<Writer> createWriterYAML(const LinkingContext &); +} // end namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/include/lld/Driver/Driver.h b/contrib/llvm/tools/lld/include/lld/Driver/Driver.h new file mode 100644 index 000000000000..a3265c85716a --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Driver/Driver.h @@ -0,0 +1,32 @@ +//===- lld/Driver/Driver.h - Linker Driver Emulator -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_DRIVER_DRIVER_H +#define LLD_DRIVER_DRIVER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/raw_ostream.h" + +namespace lld { +namespace coff { +bool link(llvm::ArrayRef<const char *> Args); +} + +namespace elf { +bool link(llvm::ArrayRef<const char *> Args, bool CanExitEarly, + llvm::raw_ostream &Diag = llvm::errs()); +} + +namespace mach_o { +bool link(llvm::ArrayRef<const char *> Args, + llvm::raw_ostream &Diag = llvm::errs()); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/include/lld/ReaderWriter/MachOLinkingContext.h b/contrib/llvm/tools/lld/include/lld/ReaderWriter/MachOLinkingContext.h new file mode 100644 index 000000000000..a9e80f50b23d --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/ReaderWriter/MachOLinkingContext.h @@ -0,0 +1,508 @@ +//===- lld/ReaderWriter/MachOLinkingContext.h -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H +#define LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" +#include <set> + +using llvm::MachO::HeaderFileType; + +namespace lld { + +namespace mach_o { +class ArchHandler; +class MachODylibFile; +class MachOFile; +class SectCreateFile; +} + +class MachOLinkingContext : public LinkingContext { +public: + MachOLinkingContext(); + ~MachOLinkingContext() override; + + enum Arch { + arch_unknown, + arch_ppc, + arch_x86, + arch_x86_64, + arch_armv6, + arch_armv7, + arch_armv7s, + arch_arm64, + }; + + enum class OS { + unknown, + macOSX, + iOS, + iOS_simulator + }; + + enum class ExportMode { + globals, // Default, all global symbols exported. + whiteList, // -exported_symbol[s_list], only listed symbols exported. + blackList // -unexported_symbol[s_list], no listed symbol exported. + }; + + enum class DebugInfoMode { + addDebugMap, // Default + noDebugMap // -S option + }; + + enum class UndefinedMode { + error, + warning, + suppress, + dynamicLookup + }; + + enum ObjCConstraint { + objc_unknown = 0, + objc_supports_gc = 2, + objc_gc_only = 4, + // Image optimized by dyld = 8 + // GC compaction = 16 + objc_retainReleaseForSimulator = 32, + objc_retainRelease + }; + + /// Initializes the context to sane default values given the specified output + /// file type, arch, os, and minimum os version. This should be called before + /// other setXXX() methods. + void configure(HeaderFileType type, Arch arch, OS os, uint32_t minOSVersion, + bool exportDynamicSymbols); + + void addPasses(PassManager &pm) override; + bool validateImpl(raw_ostream &diagnostics) override; + std::string demangle(StringRef symbolName) const override; + + void createImplicitFiles(std::vector<std::unique_ptr<File>> &) override; + + /// Creates a new file which is owned by the context. Returns a pointer to + /// the new file. + template <class T, class... Args> + typename std::enable_if<!std::is_array<T>::value, T *>::type + make_file(Args &&... args) const { + auto file = std::unique_ptr<T>(new T(std::forward<Args>(args)...)); + auto *filePtr = file.get(); + auto *ctx = const_cast<MachOLinkingContext *>(this); + ctx->getNodes().push_back(llvm::make_unique<FileNode>(std::move(file))); + return filePtr; + } + + uint32_t getCPUType() const; + uint32_t getCPUSubType() const; + + bool addEntryPointLoadCommand() const; + bool addUnixThreadLoadCommand() const; + bool outputTypeHasEntry() const; + bool is64Bit() const; + + virtual uint64_t pageZeroSize() const { return _pageZeroSize; } + virtual uint64_t pageSize() const { return _pageSize; } + + mach_o::ArchHandler &archHandler() const; + + HeaderFileType outputMachOType() const { return _outputMachOType; } + + Arch arch() const { return _arch; } + StringRef archName() const { return nameFromArch(_arch); } + OS os() const { return _os; } + + ExportMode exportMode() const { return _exportMode; } + void setExportMode(ExportMode mode) { _exportMode = mode; } + void addExportSymbol(StringRef sym); + bool exportRestrictMode() const { return _exportMode != ExportMode::globals; } + bool exportSymbolNamed(StringRef sym) const; + + DebugInfoMode debugInfoMode() const { return _debugInfoMode; } + void setDebugInfoMode(DebugInfoMode mode) { + _debugInfoMode = mode; + } + + void appendOrderedSymbol(StringRef symbol, StringRef filename); + + bool keepPrivateExterns() const { return _keepPrivateExterns; } + void setKeepPrivateExterns(bool v) { _keepPrivateExterns = v; } + bool demangleSymbols() const { return _demangle; } + void setDemangleSymbols(bool d) { _demangle = d; } + bool mergeObjCCategories() const { return _mergeObjCCategories; } + void setMergeObjCCategories(bool v) { _mergeObjCCategories = v; } + /// Create file at specified path which will contain a binary encoding + /// of all input and output file paths. + std::error_code createDependencyFile(StringRef path); + void addInputFileDependency(StringRef path) const; + void addInputFileNotFound(StringRef path) const; + void addOutputFileDependency(StringRef path) const; + + bool minOS(StringRef mac, StringRef iOS) const; + void setDoNothing(bool value) { _doNothing = value; } + bool doNothing() const { return _doNothing; } + bool printAtoms() const { return _printAtoms; } + bool testingFileUsage() const { return _testingFileUsage; } + const StringRefVector &searchDirs() const { return _searchDirs; } + const StringRefVector &frameworkDirs() const { return _frameworkDirs; } + void setSysLibRoots(const StringRefVector &paths); + const StringRefVector &sysLibRoots() const { return _syslibRoots; } + bool PIE() const { return _pie; } + void setPIE(bool pie) { _pie = pie; } + bool generateVersionLoadCommand() const { + return _generateVersionLoadCommand; + } + void setGenerateVersionLoadCommand(bool v) { + _generateVersionLoadCommand = v; + } + + bool generateFunctionStartsLoadCommand() const { + return _generateFunctionStartsLoadCommand; + } + void setGenerateFunctionStartsLoadCommand(bool v) { + _generateFunctionStartsLoadCommand = v; + } + + bool generateDataInCodeLoadCommand() const { + return _generateDataInCodeLoadCommand; + } + void setGenerateDataInCodeLoadCommand(bool v) { + _generateDataInCodeLoadCommand = v; + } + + uint64_t stackSize() const { return _stackSize; } + void setStackSize(uint64_t stackSize) { _stackSize = stackSize; } + + uint64_t baseAddress() const { return _baseAddress; } + void setBaseAddress(uint64_t baseAddress) { _baseAddress = baseAddress; } + + ObjCConstraint objcConstraint() const { return _objcConstraint; } + + uint32_t osMinVersion() const { return _osMinVersion; } + + uint32_t sdkVersion() const { return _sdkVersion; } + void setSdkVersion(uint64_t v) { _sdkVersion = v; } + + uint64_t sourceVersion() const { return _sourceVersion; } + void setSourceVersion(uint64_t v) { _sourceVersion = v; } + + uint32_t swiftVersion() const { return _swiftVersion; } + + /// \brief Checks whether a given path on the filesystem exists. + /// + /// When running in -test_file_usage mode, this method consults an + /// internally maintained list of files that exist (provided by -path_exists) + /// instead of the actual filesystem. + bool pathExists(StringRef path) const; + + /// Like pathExists() but only used on files - not directories. + bool fileExists(StringRef path) const; + + /// \brief Adds any library search paths derived from the given base, possibly + /// modified by -syslibroots. + /// + /// The set of paths added consists of approximately all syslibroot-prepended + /// versions of libPath that exist, or the original libPath if there are none + /// for whatever reason. With various edge-cases for compatibility. + void addModifiedSearchDir(StringRef libPath, bool isSystemPath = false); + + /// \brief Determine whether -lFoo can be resolve within the given path, and + /// return the filename if so. + /// + /// The -lFoo option is documented to search for libFoo.dylib and libFoo.a in + /// that order, unless Foo ends in ".o", in which case only the exact file + /// matches (e.g. -lfoo.o would only find foo.o). + llvm::Optional<StringRef> searchDirForLibrary(StringRef path, + StringRef libName) const; + + /// \brief Iterates through all search path entries looking for libName (as + /// specified by -lFoo). + llvm::Optional<StringRef> searchLibrary(StringRef libName) const; + + /// Add a framework search path. Internally, this method may be prepended + /// the path with syslibroot. + void addFrameworkSearchDir(StringRef fwPath, bool isSystemPath = false); + + /// \brief Iterates through all framework directories looking for + /// Foo.framework/Foo (when fwName = "Foo"). + llvm::Optional<StringRef> findPathForFramework(StringRef fwName) const; + + /// \brief The dylib's binary compatibility version, in the raw uint32 format. + /// + /// When building a dynamic library, this is the compatibility version that + /// gets embedded into the result. Other Mach-O binaries that link against + /// this library will store the compatibility version in its load command. At + /// runtime, the loader will verify that the binary is compatible with the + /// installed dynamic library. + uint32_t compatibilityVersion() const { return _compatibilityVersion; } + + /// \brief The dylib's current version, in the the raw uint32 format. + /// + /// When building a dynamic library, this is the current version that gets + /// embedded into the result. Other Mach-O binaries that link against + /// this library will store the compatibility version in its load command. + uint32_t currentVersion() const { return _currentVersion; } + + /// \brief The dylib's install name. + /// + /// Binaries that link against the dylib will embed this path into the dylib + /// load command. When loading the binaries at runtime, this is the location + /// on disk that the loader will look for the dylib. + StringRef installName() const { return _installName; } + + /// \brief Whether or not the dylib has side effects during initialization. + /// + /// Dylibs marked as being dead strippable provide the guarantee that loading + /// the dylib has no side effects, allowing the linker to strip out the dylib + /// when linking a binary that does not use any of its symbols. + bool deadStrippableDylib() const { return _deadStrippableDylib; } + + /// \brief Whether or not to use flat namespace. + /// + /// MachO usually uses a two-level namespace, where each external symbol + /// referenced by the target is associated with the dylib that will provide + /// the symbol's definition at runtime. Using flat namespace overrides this + /// behavior: the linker searches all dylibs on the command line and all + /// dylibs those original dylibs depend on, but does not record which dylib + /// an external symbol came from. At runtime dyld again searches all images + /// and uses the first definition it finds. In addition, any undefines in + /// loaded flat_namespace dylibs must be resolvable at build time. + bool useFlatNamespace() const { return _flatNamespace; } + + /// \brief How to handle undefined symbols. + /// + /// Options are: + /// * error: Report an error and terminate linking. + /// * warning: Report a warning, but continue linking. + /// * suppress: Ignore and continue linking. + /// * dynamic_lookup: For use with -twolevel namespace: Records source dylibs + /// for symbols that are defined in a linked dylib at static link time. + /// Undefined symbols are handled by searching all loaded images at + /// runtime. + UndefinedMode undefinedMode() const { return _undefinedMode; } + + /// \brief The path to the executable that will load the bundle at runtime. + /// + /// When building a Mach-O bundle, this executable will be examined if there + /// are undefined symbols after the main link phase. It is expected that this + /// binary will be loading the bundle at runtime and will provide the symbols + /// at that point. + StringRef bundleLoader() const { return _bundleLoader; } + + void setCompatibilityVersion(uint32_t vers) { _compatibilityVersion = vers; } + void setCurrentVersion(uint32_t vers) { _currentVersion = vers; } + void setInstallName(StringRef name) { _installName = name; } + void setDeadStrippableDylib(bool deadStrippable) { + _deadStrippableDylib = deadStrippable; + } + void setUseFlatNamespace(bool flatNamespace) { + _flatNamespace = flatNamespace; + } + + void setUndefinedMode(UndefinedMode undefinedMode) { + _undefinedMode = undefinedMode; + } + + void setBundleLoader(StringRef loader) { _bundleLoader = loader; } + void setPrintAtoms(bool value=true) { _printAtoms = value; } + void setTestingFileUsage(bool value = true) { + _testingFileUsage = value; + } + void addExistingPathForDebug(StringRef path) { + _existingPaths.insert(path); + } + + void addRpath(StringRef rpath); + const StringRefVector &rpaths() const { return _rpaths; } + + /// Add section alignment constraint on final layout. + void addSectionAlignment(StringRef seg, StringRef sect, uint16_t align); + + /// \brief Add a section based on a command-line sectcreate option. + void addSectCreateSection(StringRef seg, StringRef sect, + std::unique_ptr<MemoryBuffer> content); + + /// Returns true if specified section had alignment constraints. + bool sectionAligned(StringRef seg, StringRef sect, uint16_t &align) const; + + StringRef dyldPath() const { return "/usr/lib/dyld"; } + + /// Stub creation Pass should be run. + bool needsStubsPass() const; + + // GOT creation Pass should be run. + bool needsGOTPass() const; + + /// Pass to add TLV sections. + bool needsTLVPass() const; + + /// Pass to transform __compact_unwind into __unwind_info should be run. + bool needsCompactUnwindPass() const; + + /// Pass to add shims switching between thumb and arm mode. + bool needsShimPass() const; + + /// Pass to add objc image info and optimized objc data. + bool needsObjCPass() const; + + /// Magic symbol name stubs will need to help lazy bind. + StringRef binderSymbolName() const; + + /// Used to keep track of direct and indirect dylibs. + void registerDylib(mach_o::MachODylibFile *dylib, bool upward) const; + + // Reads a file from disk to memory. Returns only a needed chunk + // if a fat binary. + ErrorOr<std::unique_ptr<MemoryBuffer>> getMemoryBuffer(StringRef path); + + /// Used to find indirect dylibs. Instantiates a MachODylibFile if one + /// has not already been made for the requested dylib. Uses -L and -F + /// search paths to allow indirect dylibs to be overridden. + mach_o::MachODylibFile* findIndirectDylib(StringRef path); + + uint32_t dylibCurrentVersion(StringRef installName) const; + + uint32_t dylibCompatVersion(StringRef installName) const; + + ArrayRef<mach_o::MachODylibFile*> allDylibs() const { + return _allDylibs; + } + + /// Creates a copy (owned by this MachOLinkingContext) of a string. + StringRef copy(StringRef str) { return str.copy(_allocator); } + + /// If the memoryBuffer is a fat file with a slice for the current arch, + /// this method will return the offset and size of that slice. + bool sliceFromFatFile(MemoryBufferRef mb, uint32_t &offset, uint32_t &size); + + /// Returns if a command line option specified dylib is an upward link. + bool isUpwardDylib(StringRef installName) const; + + static bool isThinObjectFile(StringRef path, Arch &arch); + static Arch archFromCpuType(uint32_t cputype, uint32_t cpusubtype); + static Arch archFromName(StringRef archName); + static StringRef nameFromArch(Arch arch); + static uint32_t cpuTypeFromArch(Arch arch); + static uint32_t cpuSubtypeFromArch(Arch arch); + static bool is64Bit(Arch arch); + static bool isHostEndian(Arch arch); + static bool isBigEndian(Arch arch); + + /// Construct 32-bit value from string "X.Y.Z" where + /// bits are xxxx.yy.zz. Largest number is 65535.255.255 + static bool parsePackedVersion(StringRef str, uint32_t &result); + + /// Construct 64-bit value from string "A.B.C.D.E" where + /// bits are aaaa.bb.cc.dd.ee. Largest number is 16777215.1023.1023.1023.1023 + static bool parsePackedVersion(StringRef str, uint64_t &result); + + void finalizeInputFiles() override; + + llvm::Error handleLoadedFile(File &file) override; + + bool customAtomOrderer(const DefinedAtom *left, const DefinedAtom *right, + bool &leftBeforeRight) const; + + /// Return the 'flat namespace' file. This is the file that supplies + /// atoms for otherwise undefined symbols when the -flat_namespace or + /// -undefined dynamic_lookup options are used. + File* flatNamespaceFile() const { return _flatNamespaceFile; } + +private: + Writer &writer() const override; + mach_o::MachODylibFile* loadIndirectDylib(StringRef path); + void checkExportWhiteList(const DefinedAtom *atom) const; + void checkExportBlackList(const DefinedAtom *atom) const; + struct ArchInfo { + StringRef archName; + MachOLinkingContext::Arch arch; + bool littleEndian; + uint32_t cputype; + uint32_t cpusubtype; + }; + + struct SectionAlign { + StringRef segmentName; + StringRef sectionName; + uint16_t align; + }; + + struct OrderFileNode { + StringRef fileFilter; + unsigned order; + }; + + static bool findOrderOrdinal(const std::vector<OrderFileNode> &nodes, + const DefinedAtom *atom, unsigned &ordinal); + + static ArchInfo _s_archInfos[]; + + std::set<StringRef> _existingPaths; // For testing only. + StringRefVector _searchDirs; + StringRefVector _syslibRoots; + StringRefVector _frameworkDirs; + HeaderFileType _outputMachOType = llvm::MachO::MH_EXECUTE; + bool _outputMachOTypeStatic = false; // Disambiguate static vs dynamic prog + bool _doNothing = false; // for -help and -v which just print info + bool _pie = false; + Arch _arch = arch_unknown; + OS _os = OS::macOSX; + uint32_t _osMinVersion = 0; + uint32_t _sdkVersion = 0; + uint64_t _sourceVersion = 0; + uint64_t _pageZeroSize = 0; + uint64_t _pageSize = 4096; + uint64_t _baseAddress = 0; + uint64_t _stackSize = 0; + uint32_t _compatibilityVersion = 0; + uint32_t _currentVersion = 0; + ObjCConstraint _objcConstraint = objc_unknown; + uint32_t _swiftVersion = 0; + StringRef _installName; + StringRefVector _rpaths; + bool _flatNamespace = false; + UndefinedMode _undefinedMode = UndefinedMode::error; + bool _deadStrippableDylib = false; + bool _printAtoms = false; + bool _testingFileUsage = false; + bool _keepPrivateExterns = false; + bool _demangle = false; + bool _mergeObjCCategories = true; + bool _generateVersionLoadCommand = false; + bool _generateFunctionStartsLoadCommand = false; + bool _generateDataInCodeLoadCommand = false; + StringRef _bundleLoader; + mutable std::unique_ptr<mach_o::ArchHandler> _archHandler; + mutable std::unique_ptr<Writer> _writer; + std::vector<SectionAlign> _sectAligns; + mutable llvm::StringMap<mach_o::MachODylibFile*> _pathToDylibMap; + mutable std::vector<mach_o::MachODylibFile*> _allDylibs; + mutable std::set<mach_o::MachODylibFile*> _upwardDylibs; + mutable std::vector<std::unique_ptr<File>> _indirectDylibs; + mutable std::mutex _dylibsMutex; + ExportMode _exportMode = ExportMode::globals; + llvm::StringSet<> _exportedSymbols; + DebugInfoMode _debugInfoMode = DebugInfoMode::addDebugMap; + std::unique_ptr<llvm::raw_fd_ostream> _dependencyInfo; + llvm::StringMap<std::vector<OrderFileNode>> _orderFiles; + unsigned _orderFileEntries = 0; + File *_flatNamespaceFile = nullptr; + mach_o::SectCreateFile *_sectCreateFile = nullptr; +}; + +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H diff --git a/contrib/llvm/tools/lld/include/lld/ReaderWriter/YamlContext.h b/contrib/llvm/tools/lld/include/lld/ReaderWriter/YamlContext.h new file mode 100644 index 000000000000..b26161a15431 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/ReaderWriter/YamlContext.h @@ -0,0 +1,42 @@ +//===- lld/ReaderWriter/YamlContext.h - object used in YAML I/O context ---===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_YAML_CONTEXT_H +#define LLD_READER_WRITER_YAML_CONTEXT_H + +#include "lld/Core/LLVM.h" +#include <functional> +#include <memory> +#include <vector> + +namespace lld { +class File; +class LinkingContext; +namespace mach_o { +namespace normalized { +struct NormalizedFile; +} +} + +using lld::mach_o::normalized::NormalizedFile; + +/// When YAML I/O is used in lld, the yaml context always holds a YamlContext +/// object. We need to support hetergenous yaml documents which each require +/// different context info. This struct supports all clients. +struct YamlContext { + const LinkingContext *_ctx = nullptr; + const Registry *_registry = nullptr; + File *_file = nullptr; + NormalizedFile *_normalizeMachOFile = nullptr; + StringRef _path; +}; + +} // end namespace lld + +#endif // LLD_READER_WRITER_YAML_CONTEXT_H diff --git a/contrib/llvm/tools/lld/include/lld/Support/Memory.h b/contrib/llvm/tools/lld/include/lld/Support/Memory.h new file mode 100644 index 000000000000..46db4a39f696 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Support/Memory.h @@ -0,0 +1,63 @@ +//===- Memory.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines arena allocators. +// +// Almost all large objects, such as files, sections or symbols, are +// used for the entire lifetime of the linker once they are created. +// This usage characteristic makes arena allocator an attractive choice +// where the entire linker is one arena. With an arena, newly created +// objects belong to the arena and freed all at once when everything is done. +// Arena allocators are efficient and easy to understand. +// Most objects are allocated using the arena allocators defined by this file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MEMORY_H +#define LLD_MEMORY_H + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" +#include <vector> + +namespace lld { + +// Use this arena if your object doesn't have a destructor. +extern llvm::BumpPtrAllocator BAlloc; +extern llvm::StringSaver Saver; + +// These two classes are hack to keep track of all +// SpecificBumpPtrAllocator instances. +struct SpecificAllocBase { + SpecificAllocBase() { Instances.push_back(this); } + virtual ~SpecificAllocBase() = default; + virtual void reset() = 0; + static std::vector<SpecificAllocBase *> Instances; +}; + +template <class T> struct SpecificAlloc : public SpecificAllocBase { + void reset() override { Alloc.DestroyAll(); } + llvm::SpecificBumpPtrAllocator<T> Alloc; +}; + +// Use this arena if your object has a destructor. +// Your destructor will be invoked from freeArena(). +template <typename T, typename... U> inline T *make(U &&... Args) { + static SpecificAlloc<T> Alloc; + return new (Alloc.Alloc.Allocate()) T(std::forward<U>(Args)...); +} + +inline void freeArena() { + for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances) + Alloc->reset(); + BAlloc.Reset(); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/lib/CMakeLists.txt b/contrib/llvm/tools/lld/lib/CMakeLists.txt new file mode 100644 index 000000000000..699f5e93f8af --- /dev/null +++ b/contrib/llvm/tools/lld/lib/CMakeLists.txt @@ -0,0 +1,4 @@ +add_subdirectory(Config) +add_subdirectory(Core) +add_subdirectory(Driver) +add_subdirectory(ReaderWriter) diff --git a/contrib/llvm/tools/lld/lib/Config/CMakeLists.txt b/contrib/llvm/tools/lld/lib/Config/CMakeLists.txt new file mode 100644 index 000000000000..3e142b66f578 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Config/CMakeLists.txt @@ -0,0 +1,9 @@ +add_lld_library(lldConfig + Version.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Config + + LINK_COMPONENTS + Support + ) diff --git a/contrib/llvm/tools/lld/lib/Config/Version.cpp b/contrib/llvm/tools/lld/lib/Config/Version.cpp new file mode 100644 index 000000000000..25544756f8be --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Config/Version.cpp @@ -0,0 +1,43 @@ +//===- lib/Config/Version.cpp - LLD Version Number ---------------*- C++-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines several version-related utility functions for LLD. +// +//===----------------------------------------------------------------------===// + +#include "lld/Config/Version.h" + +using namespace llvm; + +// Returns an SVN repository path, which is usually "trunk". +static std::string getRepositoryPath() { + StringRef S = LLD_REPOSITORY_STRING; + size_t Pos = S.find("lld/"); + if (Pos != StringRef::npos) + return S.substr(Pos + 4); + return S; +} + +// Returns an SVN repository name, e.g., " (trunk 284614)" +// or an empty string if no repository info is available. +static std::string getRepository() { + std::string Repo = getRepositoryPath(); + std::string Rev = LLD_REVISION_STRING; + + if (Repo.empty() && Rev.empty()) + return ""; + if (!Repo.empty() && !Rev.empty()) + return " (" + Repo + " " + Rev + ")"; + return " (" + Repo + Rev + ")"; +} + +// Returns a version string, e.g., "LLD 4.0 (lld/trunk 284614)". +std::string lld::getLLDVersion() { + return "LLD " + std::string(LLD_VERSION_STRING) + getRepository(); +} diff --git a/contrib/llvm/tools/lld/lib/Core/CMakeLists.txt b/contrib/llvm/tools/lld/lib/Core/CMakeLists.txt new file mode 100644 index 000000000000..7f4c47f14b90 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/CMakeLists.txt @@ -0,0 +1,17 @@ +add_lld_library(lldCore + DefinedAtom.cpp + Error.cpp + File.cpp + LinkingContext.cpp + Reader.cpp + Reproduce.cpp + Resolver.cpp + SymbolTable.cpp + Writer.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Core + + LINK_COMPONENTS + Support + ) diff --git a/contrib/llvm/tools/lld/lib/Core/DefinedAtom.cpp b/contrib/llvm/tools/lld/lib/Core/DefinedAtom.cpp new file mode 100644 index 000000000000..177cae7fcbf0 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/DefinedAtom.cpp @@ -0,0 +1,82 @@ +//===- DefinedAtom.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ErrorHandling.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" + +namespace lld { + +DefinedAtom::ContentPermissions DefinedAtom::permissions() const { + // By default base permissions on content type. + return permissions(this->contentType()); +} + +// Utility function for deriving permissions from content type +DefinedAtom::ContentPermissions DefinedAtom::permissions(ContentType type) { + switch (type) { + case typeCode: + case typeResolver: + case typeBranchIsland: + case typeBranchShim: + case typeStub: + case typeStubHelper: + case typeMachHeader: + return permR_X; + + case typeConstant: + case typeCString: + case typeUTF16String: + case typeCFI: + case typeLSDA: + case typeLiteral4: + case typeLiteral8: + case typeLiteral16: + case typeDTraceDOF: + case typeCompactUnwindInfo: + case typeProcessedUnwindInfo: + case typeObjCImageInfo: + case typeObjCMethodList: + return permR__; + + case typeData: + case typeDataFast: + case typeZeroFill: + case typeZeroFillFast: + case typeObjC1Class: + case typeLazyPointer: + case typeLazyDylibPointer: + case typeNonLazyPointer: + case typeThunkTLV: + return permRW_; + + case typeGOT: + case typeConstData: + case typeCFString: + case typeInitializerPtr: + case typeTerminatorPtr: + case typeCStringPtr: + case typeObjCClassPtr: + case typeObjC2CategoryList: + case typeInterposingTuples: + case typeTLVInitialData: + case typeTLVInitialZeroFill: + case typeTLVInitializerPtr: + return permRW_L; + + case typeUnknown: + case typeTempLTO: + case typeSectCreate: + case typeDSOHandle: + return permUnknown; + } + llvm_unreachable("unknown content type"); +} + +} // namespace diff --git a/contrib/llvm/tools/lld/lib/Core/Error.cpp b/contrib/llvm/tools/lld/lib/Core/Error.cpp new file mode 100644 index 000000000000..6fc76f7ca3d0 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/Error.cpp @@ -0,0 +1,93 @@ +//===- Error.cpp - system_error extensions for lld --------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Error.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include <mutex> +#include <string> +#include <vector> + +using namespace lld; + +namespace { +class _YamlReaderErrorCategory : public std::error_category { +public: + const char* name() const noexcept override { + return "lld.yaml.reader"; + } + + std::string message(int ev) const override { + switch (static_cast<YamlReaderError>(ev)) { + case YamlReaderError::unknown_keyword: + return "Unknown keyword found in yaml file"; + case YamlReaderError::illegal_value: + return "Bad value found in yaml file"; + } + llvm_unreachable("An enumerator of YamlReaderError does not have a " + "message defined."); + } +}; +} // end anonymous namespace + +const std::error_category &lld::YamlReaderCategory() { + static _YamlReaderErrorCategory o; + return o; +} + +namespace lld { + +/// Temporary class to enable make_dynamic_error_code() until +/// llvm::ErrorOr<> is updated to work with error encapsulations +/// other than error_code. +class dynamic_error_category : public std::error_category { +public: + ~dynamic_error_category() override = default; + + const char *name() const noexcept override { + return "lld.dynamic_error"; + } + + std::string message(int ev) const override { + assert(ev >= 0); + assert(ev < (int)_messages.size()); + // The value is an index into the string vector. + return _messages[ev]; + } + + int add(std::string msg) { + std::lock_guard<std::recursive_mutex> lock(_mutex); + // Value zero is always the successs value. + if (_messages.empty()) + _messages.push_back("Success"); + _messages.push_back(msg); + // Return the index of the string just appended. + return _messages.size() - 1; + } + +private: + std::vector<std::string> _messages; + std::recursive_mutex _mutex; +}; + +static dynamic_error_category categorySingleton; + +std::error_code make_dynamic_error_code(StringRef msg) { + return std::error_code(categorySingleton.add(msg), categorySingleton); +} + +char GenericError::ID = 0; + +GenericError::GenericError(Twine Msg) : Msg(Msg.str()) { } + +void GenericError::log(raw_ostream &OS) const { + OS << Msg; +} + +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/Core/File.cpp b/contrib/llvm/tools/lld/lib/Core/File.cpp new file mode 100644 index 000000000000..30ded091a92a --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/File.cpp @@ -0,0 +1,29 @@ +//===- Core/File.cpp - A Container of Atoms -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include <mutex> + +namespace lld { + +File::~File() = default; + +File::AtomVector<DefinedAtom> File::_noDefinedAtoms; +File::AtomVector<UndefinedAtom> File::_noUndefinedAtoms; +File::AtomVector<SharedLibraryAtom> File::_noSharedLibraryAtoms; +File::AtomVector<AbsoluteAtom> File::_noAbsoluteAtoms; + +std::error_code File::parse() { + std::lock_guard<std::mutex> lock(_parseMutex); + if (!_lastError.hasValue()) + _lastError = doParse(); + return _lastError.getValue(); +} + +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/Core/LinkingContext.cpp b/contrib/llvm/tools/lld/lib/Core/LinkingContext.cpp new file mode 100644 index 000000000000..5de863aa7f37 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/LinkingContext.cpp @@ -0,0 +1,70 @@ +//===- lib/Core/LinkingContext.cpp - Linker Context Object Interface ------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/File.h" +#include "lld/Core/Node.h" +#include "lld/Core/Simple.h" +#include "lld/Core/Writer.h" +#include <algorithm> + +namespace lld { + +LinkingContext::LinkingContext() = default; + +LinkingContext::~LinkingContext() = default; + +bool LinkingContext::validate(raw_ostream &diagnostics) { + return validateImpl(diagnostics); +} + +llvm::Error LinkingContext::writeFile(const File &linkedFile) const { + return this->writer().writeFile(linkedFile, _outputPath); +} + +std::unique_ptr<File> LinkingContext::createEntrySymbolFile() const { + return createEntrySymbolFile("<command line option -e>"); +} + +std::unique_ptr<File> +LinkingContext::createEntrySymbolFile(StringRef filename) const { + if (entrySymbolName().empty()) + return nullptr; + std::unique_ptr<SimpleFile> entryFile(new SimpleFile(filename, + File::kindEntryObject)); + entryFile->addAtom( + *(new (_allocator) SimpleUndefinedAtom(*entryFile, entrySymbolName()))); + return std::move(entryFile); +} + +std::unique_ptr<File> LinkingContext::createUndefinedSymbolFile() const { + return createUndefinedSymbolFile("<command line option -u or --defsym>"); +} + +std::unique_ptr<File> +LinkingContext::createUndefinedSymbolFile(StringRef filename) const { + if (_initialUndefinedSymbols.empty()) + return nullptr; + std::unique_ptr<SimpleFile> undefinedSymFile( + new SimpleFile(filename, File::kindUndefinedSymsObject)); + for (StringRef undefSym : _initialUndefinedSymbols) + undefinedSymFile->addAtom(*(new (_allocator) SimpleUndefinedAtom( + *undefinedSymFile, undefSym))); + return std::move(undefinedSymFile); +} + +void LinkingContext::createInternalFiles( + std::vector<std::unique_ptr<File>> &result) const { + if (std::unique_ptr<File> file = createEntrySymbolFile()) + result.push_back(std::move(file)); + if (std::unique_ptr<File> file = createUndefinedSymbolFile()) + result.push_back(std::move(file)); +} + +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/Core/Reader.cpp b/contrib/llvm/tools/lld/lib/Core/Reader.cpp new file mode 100644 index 000000000000..24652abec688 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/Reader.cpp @@ -0,0 +1,110 @@ +//===- lib/Core/Reader.cpp ------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Reader.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <memory> + +namespace lld { + +YamlIOTaggedDocumentHandler::~YamlIOTaggedDocumentHandler() = default; + +void Registry::add(std::unique_ptr<Reader> reader) { + _readers.push_back(std::move(reader)); +} + +void Registry::add(std::unique_ptr<YamlIOTaggedDocumentHandler> handler) { + _yamlHandlers.push_back(std::move(handler)); +} + +ErrorOr<std::unique_ptr<File>> +Registry::loadFile(std::unique_ptr<MemoryBuffer> mb) const { + // Get file magic. + StringRef content(mb->getBufferStart(), mb->getBufferSize()); + llvm::sys::fs::file_magic fileType = llvm::sys::fs::identify_magic(content); + + // Ask each registered reader if it can handle this file type or extension. + for (const std::unique_ptr<Reader> &reader : _readers) { + if (!reader->canParse(fileType, mb->getMemBufferRef())) + continue; + return reader->loadFile(std::move(mb), *this); + } + + // No Reader could parse this file. + return make_error_code(llvm::errc::executable_format_error); +} + +static const Registry::KindStrings kindStrings[] = { + {Reference::kindLayoutAfter, "layout-after"}, + {Reference::kindAssociate, "associate"}, + LLD_KIND_STRING_END}; + +Registry::Registry() { + addKindTable(Reference::KindNamespace::all, Reference::KindArch::all, + kindStrings); +} + +bool Registry::handleTaggedDoc(llvm::yaml::IO &io, + const lld::File *&file) const { + for (const std::unique_ptr<YamlIOTaggedDocumentHandler> &h : _yamlHandlers) + if (h->handledDocTag(io, file)) + return true; + return false; +} + +void Registry::addKindTable(Reference::KindNamespace ns, + Reference::KindArch arch, + const KindStrings array[]) { + KindEntry entry = { ns, arch, array }; + _kindEntries.push_back(entry); +} + +bool Registry::referenceKindFromString(StringRef inputStr, + Reference::KindNamespace &ns, + Reference::KindArch &arch, + Reference::KindValue &value) const { + for (const KindEntry &entry : _kindEntries) { + for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { + if (!inputStr.equals(pair->name)) + continue; + ns = entry.ns; + arch = entry.arch; + value = pair->value; + return true; + } + } + return false; +} + +bool Registry::referenceKindToString(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue value, + StringRef &str) const { + for (const KindEntry &entry : _kindEntries) { + if (entry.ns != ns) + continue; + if (entry.arch != arch) + continue; + for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { + if (pair->value != value) + continue; + str = pair->name; + return true; + } + } + return false; +} + +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/Core/Reproduce.cpp b/contrib/llvm/tools/lld/lib/Core/Reproduce.cpp new file mode 100644 index 000000000000..e3629a93cbe3 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/Reproduce.cpp @@ -0,0 +1,66 @@ +//===- Reproduce.cpp - Utilities for creating reproducers -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Reproduce.h" +#include "llvm/Option/Arg.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" + +using namespace lld; +using namespace llvm; +using namespace llvm::sys; + +// Makes a given pathname an absolute path first, and then remove +// beginning /. For example, "../foo.o" is converted to "home/john/foo.o", +// assuming that the current directory is "/home/john/bar". +// Returned string is a forward slash separated path even on Windows to avoid +// a mess with backslash-as-escape and backslash-as-path-separator. +std::string lld::relativeToRoot(StringRef Path) { + SmallString<128> Abs = Path; + if (fs::make_absolute(Abs)) + return Path; + path::remove_dots(Abs, /*remove_dot_dot=*/true); + + // This is Windows specific. root_name() returns a drive letter + // (e.g. "c:") or a UNC name (//net). We want to keep it as part + // of the result. + SmallString<128> Res; + StringRef Root = path::root_name(Abs); + if (Root.endswith(":")) + Res = Root.drop_back(); + else if (Root.startswith("//")) + Res = Root.substr(2); + + path::append(Res, path::relative_path(Abs)); + return path::convert_to_slash(Res); +} + +// Quote a given string if it contains a space character. +std::string lld::quote(StringRef S) { + if (S.find(' ') == StringRef::npos) + return S; + return ("\"" + S + "\"").str(); +} + +std::string lld::rewritePath(StringRef S) { + if (fs::exists(S)) + return relativeToRoot(S); + return S; +} + +std::string lld::toString(opt::Arg *Arg) { + std::string K = Arg->getSpelling(); + if (Arg->getNumValues() == 0) + return K; + std::string V = quote(Arg->getValue()); + if (Arg->getOption().getRenderStyle() == opt::Option::RenderJoinedStyle) + return K + V; + return K + " " + V; +} diff --git a/contrib/llvm/tools/lld/lib/Core/Resolver.cpp b/contrib/llvm/tools/lld/lib/Core/Resolver.cpp new file mode 100644 index 000000000000..e7cfaaac7835 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/Resolver.cpp @@ -0,0 +1,505 @@ +//===- Core/Resolver.cpp - Resolves Atom References -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Atom.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/SymbolTable.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <utility> +#include <vector> + +namespace lld { + +llvm::Expected<bool> Resolver::handleFile(File &file) { + if (auto ec = _ctx.handleLoadedFile(file)) + return std::move(ec); + bool undefAdded = false; + for (auto &atom : file.defined().owning_ptrs()) + doDefinedAtom(std::move(atom)); + for (auto &atom : file.undefined().owning_ptrs()) { + if (doUndefinedAtom(std::move(atom))) + undefAdded = true; + } + for (auto &atom : file.sharedLibrary().owning_ptrs()) + doSharedLibraryAtom(std::move(atom)); + for (auto &atom : file.absolute().owning_ptrs()) + doAbsoluteAtom(std::move(atom)); + return undefAdded; +} + +llvm::Expected<bool> Resolver::forEachUndefines(File &file, + UndefCallback callback) { + size_t i = _undefineIndex[&file]; + bool undefAdded = false; + do { + for (; i < _undefines.size(); ++i) { + StringRef undefName = _undefines[i]; + if (undefName.empty()) + continue; + const Atom *atom = _symbolTable.findByName(undefName); + if (!isa<UndefinedAtom>(atom) || _symbolTable.isCoalescedAway(atom)) { + // The symbol was resolved by some other file. Cache the result. + _undefines[i] = ""; + continue; + } + auto undefAddedOrError = callback(undefName); + if (auto ec = undefAddedOrError.takeError()) + return std::move(ec); + undefAdded |= undefAddedOrError.get(); + } + } while (i < _undefines.size()); + _undefineIndex[&file] = i; + return undefAdded; +} + +llvm::Expected<bool> Resolver::handleArchiveFile(File &file) { + ArchiveLibraryFile *archiveFile = cast<ArchiveLibraryFile>(&file); + return forEachUndefines(file, + [&](StringRef undefName) -> llvm::Expected<bool> { + if (File *member = archiveFile->find(undefName)) { + member->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + return handleFile(*member); + } + return false; + }); +} + +llvm::Error Resolver::handleSharedLibrary(File &file) { + // Add all the atoms from the shared library + SharedLibraryFile *sharedLibrary = cast<SharedLibraryFile>(&file); + auto undefAddedOrError = handleFile(*sharedLibrary); + if (auto ec = undefAddedOrError.takeError()) + return ec; + undefAddedOrError = + forEachUndefines(file, [&](StringRef undefName) -> llvm::Expected<bool> { + auto atom = sharedLibrary->exports(undefName); + if (atom.get()) + doSharedLibraryAtom(std::move(atom)); + return false; + }); + + if (auto ec = undefAddedOrError.takeError()) + return ec; + return llvm::Error::success(); +} + +bool Resolver::doUndefinedAtom(OwningAtomPtr<UndefinedAtom> atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " UndefinedAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" << atom.get()->name() << "\n"); + + // tell symbol table + bool newUndefAdded = _symbolTable.add(*atom.get()); + if (newUndefAdded) + _undefines.push_back(atom.get()->name()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr<Atom>(atom.release())); + + return newUndefAdded; +} + +// Called on each atom when a file is added. Returns true if a given +// atom is added to the symbol table. +void Resolver::doDefinedAtom(OwningAtomPtr<DefinedAtom> atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " DefinedAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", file=#" + << atom.get()->file().ordinal() + << ", atom=#" + << atom.get()->ordinal() + << ", name=" + << atom.get()->name() + << ", type=" + << atom.get()->contentType() + << "\n"); + + // An atom that should never be dead-stripped is a dead-strip root. + if (_ctx.deadStrip() && + atom.get()->deadStrip() == DefinedAtom::deadStripNever) { + _deadStripRoots.insert(atom.get()); + } + + // add to list of known atoms + _symbolTable.add(*atom.get()); + _atoms.push_back(OwningAtomPtr<Atom>(atom.release())); +} + +void Resolver::doSharedLibraryAtom(OwningAtomPtr<SharedLibraryAtom> atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " SharedLibraryAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + + // tell symbol table + _symbolTable.add(*atom.get()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr<Atom>(atom.release())); +} + +void Resolver::doAbsoluteAtom(OwningAtomPtr<AbsoluteAtom> atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " AbsoluteAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + + // tell symbol table + if (atom.get()->scope() != Atom::scopeTranslationUnit) + _symbolTable.add(*atom.get()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr<Atom>(atom.release())); +} + +// Returns true if at least one of N previous files has created an +// undefined symbol. +bool Resolver::undefinesAdded(int begin, int end) { + std::vector<std::unique_ptr<Node>> &inputs = _ctx.getNodes(); + for (int i = begin; i < end; ++i) + if (FileNode *node = dyn_cast<FileNode>(inputs[i].get())) + if (_newUndefinesAdded[node->getFile()]) + return true; + return false; +} + +File *Resolver::getFile(int &index) { + std::vector<std::unique_ptr<Node>> &inputs = _ctx.getNodes(); + if ((size_t)index >= inputs.size()) + return nullptr; + if (GroupEnd *group = dyn_cast<GroupEnd>(inputs[index].get())) { + // We are at the end of the current group. If one or more new + // undefined atom has been added in the last groupSize files, we + // reiterate over the files. + int size = group->getSize(); + if (undefinesAdded(index - size, index)) { + index -= size; + return getFile(index); + } + ++index; + return getFile(index); + } + return cast<FileNode>(inputs[index++].get())->getFile(); +} + +// Keep adding atoms until _ctx.getNextFile() returns an error. This +// function is where undefined atoms are resolved. +bool Resolver::resolveUndefines() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Resolving undefines:\n"); + ScopedTask task(getDefaultDomain(), "resolveUndefines"); + int index = 0; + std::set<File *> seen; + for (;;) { + bool undefAdded = false; + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "Loading file #" << index << "\n"); + File *file = getFile(index); + if (!file) + return true; + if (std::error_code ec = file->parse()) { + llvm::errs() << "Cannot open " + file->path() + << ": " << ec.message() << "\n"; + return false; + } + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "Loaded file: " << file->path() << "\n"); + switch (file->kind()) { + case File::kindErrorObject: + case File::kindNormalizedObject: + case File::kindMachObject: + case File::kindCEntryObject: + case File::kindHeaderObject: + case File::kindEntryObject: + case File::kindUndefinedSymsObject: + case File::kindStubHelperObject: + case File::kindResolverMergedObject: + case File::kindSectCreateObject: { + // The same file may be visited more than once if the file is + // in --start-group and --end-group. Only library files should + // be processed more than once. + if (seen.count(file)) + break; + seen.insert(file); + assert(!file->hasOrdinal()); + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + auto undefAddedOrError = handleFile(*file); + if (auto EC = undefAddedOrError.takeError()) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + undefAdded = undefAddedOrError.get(); + break; + } + case File::kindArchiveLibrary: { + if (!file->hasOrdinal()) + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + auto undefAddedOrError = handleArchiveFile(*file); + if (auto EC = undefAddedOrError.takeError()) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + undefAdded = undefAddedOrError.get(); + break; + } + case File::kindSharedLibrary: + if (!file->hasOrdinal()) + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + if (auto EC = handleSharedLibrary(*file)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + break; + } + _newUndefinesAdded[file] = undefAdded; + } +} + +// switch all references to undefined or coalesced away atoms +// to the new defined atom +void Resolver::updateReferences() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Updating references:\n"); + ScopedTask task(getDefaultDomain(), "updateReferences"); + for (const OwningAtomPtr<Atom> &atom : _atoms) { + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom.get())) { + for (const Reference *ref : *defAtom) { + // A reference of type kindAssociate should't be updated. + // Instead, an atom having such reference will be removed + // if the target atom is coalesced away, so that they will + // go away as a group. + if (ref->kindNamespace() == lld::Reference::KindNamespace::all && + ref->kindValue() == lld::Reference::kindAssociate) { + if (_symbolTable.isCoalescedAway(atom.get())) + _deadAtoms.insert(ref->target()); + continue; + } + const Atom *newTarget = _symbolTable.replacement(ref->target()); + const_cast<Reference *>(ref)->setTarget(newTarget); + } + } + } +} + +// For dead code stripping, recursively mark atoms "live" +void Resolver::markLive(const Atom *atom) { + // Mark the atom is live. If it's already marked live, then stop recursion. + auto exists = _liveAtoms.insert(atom); + if (!exists.second) + return; + + // Mark all atoms it references as live + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom)) { + for (const Reference *ref : *defAtom) + markLive(ref->target()); + for (auto &p : llvm::make_range(_reverseRef.equal_range(defAtom))) { + const Atom *target = p.second; + markLive(target); + } + } +} + +static bool isBackref(const Reference *ref) { + if (ref->kindNamespace() != lld::Reference::KindNamespace::all) + return false; + return (ref->kindValue() == lld::Reference::kindLayoutAfter); +} + +// remove all atoms not actually used +void Resolver::deadStripOptimize() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Dead stripping unused atoms:\n"); + ScopedTask task(getDefaultDomain(), "deadStripOptimize"); + // only do this optimization with -dead_strip + if (!_ctx.deadStrip()) + return; + + // Some type of references prevent referring atoms to be dead-striped. + // Make a reverse map of such references before traversing the graph. + // While traversing the list of atoms, mark AbsoluteAtoms as live + // in order to avoid reclaim. + for (const OwningAtomPtr<Atom> &atom : _atoms) { + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom.get())) + for (const Reference *ref : *defAtom) + if (isBackref(ref)) + _reverseRef.insert(std::make_pair(ref->target(), atom.get())); + if (const AbsoluteAtom *absAtom = dyn_cast<AbsoluteAtom>(atom.get())) + markLive(absAtom); + } + + // By default, shared libraries are built with all globals as dead strip roots + if (_ctx.globalsAreDeadStripRoots()) + for (const OwningAtomPtr<Atom> &atom : _atoms) + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom.get())) + if (defAtom->scope() == DefinedAtom::scopeGlobal) + _deadStripRoots.insert(defAtom); + + // Or, use list of names that are dead strip roots. + for (const StringRef &name : _ctx.deadStripRoots()) { + const Atom *symAtom = _symbolTable.findByName(name); + assert(symAtom); + _deadStripRoots.insert(symAtom); + } + + // mark all roots as live, and recursively all atoms they reference + for (const Atom *dsrAtom : _deadStripRoots) + markLive(dsrAtom); + + // now remove all non-live atoms from _atoms + _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(), + [&](OwningAtomPtr<Atom> &a) { + return _liveAtoms.count(a.get()) == 0; + }), + _atoms.end()); +} + +// error out if some undefines remain +bool Resolver::checkUndefines() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Checking for undefines:\n"); + + // build vector of remaining undefined symbols + std::vector<const UndefinedAtom *> undefinedAtoms = _symbolTable.undefines(); + if (_ctx.deadStrip()) { + // When dead code stripping, we don't care if dead atoms are undefined. + undefinedAtoms.erase( + std::remove_if(undefinedAtoms.begin(), undefinedAtoms.end(), + [&](const Atom *a) { return _liveAtoms.count(a) == 0; }), + undefinedAtoms.end()); + } + + if (undefinedAtoms.empty()) + return false; + + // Warn about unresolved symbols. + bool foundUndefines = false; + for (const UndefinedAtom *undef : undefinedAtoms) { + // Skip over a weak symbol. + if (undef->canBeNull() != UndefinedAtom::canBeNullNever) + continue; + + // If this is a library and undefined symbols are allowed on the + // target platform, skip over it. + if (isa<SharedLibraryFile>(undef->file()) && _ctx.allowShlibUndefines()) + continue; + + // If the undefine is coalesced away, skip over it. + if (_symbolTable.isCoalescedAway(undef)) + continue; + + // Seems like this symbol is undefined. Warn that. + foundUndefines = true; + if (_ctx.printRemainingUndefines()) { + llvm::errs() << "Undefined symbol: " << undef->file().path() + << ": " << _ctx.demangle(undef->name()) + << "\n"; + } + } + if (!foundUndefines) + return false; + if (_ctx.printRemainingUndefines()) + llvm::errs() << "symbol(s) not found\n"; + return true; +} + +// remove from _atoms all coaleseced away atoms +void Resolver::removeCoalescedAwayAtoms() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Removing coalesced away atoms:\n"); + ScopedTask task(getDefaultDomain(), "removeCoalescedAwayAtoms"); + _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(), + [&](OwningAtomPtr<Atom> &a) { + return _symbolTable.isCoalescedAway(a.get()) || + _deadAtoms.count(a.get()); + }), + _atoms.end()); +} + +bool Resolver::resolve() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Resolving atom references:\n"); + if (!resolveUndefines()) + return false; + updateReferences(); + deadStripOptimize(); + if (checkUndefines()) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Found undefines... "); + if (!_ctx.allowRemainingUndefines()) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we don't allow\n"); + return false; + } + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we are ok with\n"); + } + removeCoalescedAwayAtoms(); + _result->addAtoms(_atoms); + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "******** Finished resolver\n"); + return true; +} + +void Resolver::MergedFile::addAtoms( + llvm::MutableArrayRef<OwningAtomPtr<Atom>> all) { + ScopedTask task(getDefaultDomain(), "addAtoms"); + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Resolver final atom list:\n"); + + for (OwningAtomPtr<Atom> &atom : all) { +#ifndef NDEBUG + if (auto *definedAtom = dyn_cast<DefinedAtom>(atom.get())) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << llvm::format(" 0x%09lX", definedAtom) + << ", file=#" + << definedAtom->file().ordinal() + << ", atom=#" + << definedAtom->ordinal() + << ", name=" + << definedAtom->name() + << ", type=" + << definedAtom->contentType() + << "\n"); + } else { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << llvm::format(" 0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + } +#endif + addAtom(*atom.release()); + } +} + +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/Core/SymbolTable.cpp b/contrib/llvm/tools/lld/lib/Core/SymbolTable.cpp new file mode 100644 index 000000000000..cacea5f30847 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/SymbolTable.cpp @@ -0,0 +1,291 @@ +//===- Core/SymbolTable.cpp - Main Symbol Table ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/SymbolTable.h" +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdlib> +#include <vector> + +namespace lld { +bool SymbolTable::add(const UndefinedAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const SharedLibraryAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const AbsoluteAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const DefinedAtom &atom) { + if (!atom.name().empty() && + atom.scope() != DefinedAtom::scopeTranslationUnit) { + // Named atoms cannot be merged by content. + assert(atom.merge() != DefinedAtom::mergeByContent); + // Track named atoms that are not scoped to file (static). + return addByName(atom); + } + if (atom.merge() == DefinedAtom::mergeByContent) { + // Named atoms cannot be merged by content. + assert(atom.name().empty()); + // Currently only read-only constants can be merged. + if (atom.permissions() == DefinedAtom::permR__) + return addByContent(atom); + // TODO: support mergeByContent of data atoms by comparing content & fixups. + } + return false; +} + +enum NameCollisionResolution { + NCR_First, + NCR_Second, + NCR_DupDef, + NCR_DupUndef, + NCR_DupShLib, + NCR_Error +}; + +static NameCollisionResolution cases[4][4] = { + //regular absolute undef sharedLib + { + // first is regular + NCR_DupDef, NCR_Error, NCR_First, NCR_First + }, + { + // first is absolute + NCR_Error, NCR_Error, NCR_First, NCR_First + }, + { + // first is undef + NCR_Second, NCR_Second, NCR_DupUndef, NCR_Second + }, + { + // first is sharedLib + NCR_Second, NCR_Second, NCR_First, NCR_DupShLib + } +}; + +static NameCollisionResolution collide(Atom::Definition first, + Atom::Definition second) { + return cases[first][second]; +} + +enum MergeResolution { + MCR_First, + MCR_Second, + MCR_Largest, + MCR_SameSize, + MCR_Error +}; + +static MergeResolution mergeCases[][6] = { + // no tentative weak weakAddress sameNameAndSize largest + {MCR_Error, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // no + {MCR_Second, MCR_Largest, MCR_Second, MCR_Second, MCR_SameSize, MCR_Largest}, // tentative + {MCR_Second, MCR_First, MCR_First, MCR_Second, MCR_SameSize, MCR_Largest}, // weak + {MCR_Second, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // weakAddress + {MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize}, // sameSize + {MCR_Largest, MCR_Largest, MCR_Largest, MCR_Largest, MCR_SameSize, MCR_Largest}, // largest +}; + +static MergeResolution mergeSelect(DefinedAtom::Merge first, + DefinedAtom::Merge second) { + assert(first != DefinedAtom::mergeByContent); + assert(second != DefinedAtom::mergeByContent); + return mergeCases[first][second]; +} + +bool SymbolTable::addByName(const Atom &newAtom) { + StringRef name = newAtom.name(); + assert(!name.empty()); + const Atom *existing = findByName(name); + if (existing == nullptr) { + // Name is not in symbol table yet, add it associate with this atom. + _nameTable[name] = &newAtom; + return true; + } + + // Do nothing if the same object is added more than once. + if (existing == &newAtom) + return false; + + // Name is already in symbol table and associated with another atom. + bool useNew = true; + switch (collide(existing->definition(), newAtom.definition())) { + case NCR_First: + useNew = false; + break; + case NCR_Second: + useNew = true; + break; + case NCR_DupDef: { + const auto *existingDef = cast<DefinedAtom>(existing); + const auto *newDef = cast<DefinedAtom>(&newAtom); + switch (mergeSelect(existingDef->merge(), newDef->merge())) { + case MCR_First: + useNew = false; + break; + case MCR_Second: + useNew = true; + break; + case MCR_Largest: { + uint64_t existingSize = existingDef->sectionSize(); + uint64_t newSize = newDef->sectionSize(); + useNew = (newSize >= existingSize); + break; + } + case MCR_SameSize: { + uint64_t existingSize = existingDef->sectionSize(); + uint64_t newSize = newDef->sectionSize(); + if (existingSize == newSize) { + useNew = true; + break; + } + llvm::errs() << "Size mismatch: " + << existing->name() << " (" << existingSize << ") " + << newAtom.name() << " (" << newSize << ")\n"; + // fallthrough + } + case MCR_Error: + llvm::errs() << "Duplicate symbols: " + << existing->name() + << ":" + << existing->file().path() + << " and " + << newAtom.name() + << ":" + << newAtom.file().path() + << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + break; + } + break; + } + case NCR_DupUndef: { + const UndefinedAtom* existingUndef = cast<UndefinedAtom>(existing); + const UndefinedAtom* newUndef = cast<UndefinedAtom>(&newAtom); + + bool sameCanBeNull = (existingUndef->canBeNull() == newUndef->canBeNull()); + if (sameCanBeNull) + useNew = false; + else + useNew = (newUndef->canBeNull() < existingUndef->canBeNull()); + break; + } + case NCR_DupShLib: { + useNew = false; + break; + } + case NCR_Error: + llvm::errs() << "SymbolTable: error while merging " << name << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + break; + } + + if (useNew) { + // Update name table to use new atom. + _nameTable[name] = &newAtom; + // Add existing atom to replacement table. + _replacedAtoms[existing] = &newAtom; + } else { + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; + } + return false; +} + +unsigned SymbolTable::AtomMappingInfo::getHashValue(const DefinedAtom *atom) { + auto content = atom->rawContent(); + return llvm::hash_combine(atom->size(), + atom->contentType(), + llvm::hash_combine_range(content.begin(), + content.end())); +} + +bool SymbolTable::AtomMappingInfo::isEqual(const DefinedAtom * const l, + const DefinedAtom * const r) { + if (l == r) + return true; + if (l == getEmptyKey() || r == getEmptyKey()) + return false; + if (l == getTombstoneKey() || r == getTombstoneKey()) + return false; + if (l->contentType() != r->contentType()) + return false; + if (l->size() != r->size()) + return false; + if (l->sectionChoice() != r->sectionChoice()) + return false; + if (l->sectionChoice() == DefinedAtom::sectionCustomRequired) { + if (!l->customSectionName().equals(r->customSectionName())) + return false; + } + ArrayRef<uint8_t> lc = l->rawContent(); + ArrayRef<uint8_t> rc = r->rawContent(); + return memcmp(lc.data(), rc.data(), lc.size()) == 0; +} + +bool SymbolTable::addByContent(const DefinedAtom &newAtom) { + AtomContentSet::iterator pos = _contentTable.find(&newAtom); + if (pos == _contentTable.end()) { + _contentTable.insert(&newAtom); + return true; + } + const Atom* existing = *pos; + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; + return false; +} + +const Atom *SymbolTable::findByName(StringRef sym) { + NameToAtom::iterator pos = _nameTable.find(sym); + if (pos == _nameTable.end()) + return nullptr; + return pos->second; +} + +const Atom *SymbolTable::replacement(const Atom *atom) { + // Find the replacement for a given atom. Atoms in _replacedAtoms + // may be chained, so find the last one. + for (;;) { + AtomToAtom::iterator pos = _replacedAtoms.find(atom); + if (pos == _replacedAtoms.end()) + return atom; + atom = pos->second; + } +} + +bool SymbolTable::isCoalescedAway(const Atom *atom) { + return _replacedAtoms.count(atom) > 0; +} + +std::vector<const UndefinedAtom *> SymbolTable::undefines() { + std::vector<const UndefinedAtom *> ret; + for (auto it : _nameTable) { + const Atom *atom = it.second; + assert(atom != nullptr); + if (const auto *undef = dyn_cast<const UndefinedAtom>(atom)) + if (_replacedAtoms.count(undef) == 0) + ret.push_back(undef); + } + return ret; +} + +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/Core/Writer.cpp b/contrib/llvm/tools/lld/lib/Core/Writer.cpp new file mode 100644 index 000000000000..51f95bc5053a --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/Writer.cpp @@ -0,0 +1,18 @@ +//===- lib/Core/Writer.cpp ------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Writer.h" + +namespace lld { + +Writer::Writer() = default; + +Writer::~Writer() = default; + +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/Driver/CMakeLists.txt b/contrib/llvm/tools/lld/lib/Driver/CMakeLists.txt new file mode 100644 index 000000000000..be75872869e6 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Driver/CMakeLists.txt @@ -0,0 +1,24 @@ +set(LLVM_TARGET_DEFINITIONS DarwinLdOptions.td) +tablegen(LLVM DarwinLdOptions.inc -gen-opt-parser-defs) +add_public_tablegen_target(DriverOptionsTableGen) + +add_lld_library(lldDriver + DarwinLdDriver.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Driver + + LINK_COMPONENTS + Object + Option + Support + + LINK_LIBS + lldConfig + lldMachO + lldCore + lldReaderWriter + lldYAML + ) + +add_dependencies(lldDriver DriverOptionsTableGen) diff --git a/contrib/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp b/contrib/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp new file mode 100644 index 000000000000..9b4aede19aa2 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp @@ -0,0 +1,1237 @@ +//===- lib/Driver/DarwinLdDriver.cpp --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Concrete instance of the Driver for darwin's ld. +/// +//===----------------------------------------------------------------------===// + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/Error.h" +#include "lld/Core/File.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Node.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/Simple.h" +#include "lld/Core/LinkingContext.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <memory> +#include <string> +#include <system_error> +#include <utility> +#include <vector> + +using namespace lld; + +namespace { + +// Create enum with OPT_xxx values for each option in DarwinLdOptions.td +enum { + OPT_INVALID = 0, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, META) \ + OPT_##ID, +#include "DarwinLdOptions.inc" +#undef OPTION +}; + +// Create prefix string literals used in DarwinLdOptions.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "DarwinLdOptions.inc" +#undef PREFIX + +// Create table mapping all options defined in DarwinLdOptions.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS }, +#include "DarwinLdOptions.inc" +#undef OPTION +}; + +// Create OptTable class for parsing actual command line arguments +class DarwinLdOptTable : public llvm::opt::OptTable { +public: + DarwinLdOptTable() : OptTable(infoTable) {} +}; + +static std::vector<std::unique_ptr<File>> +makeErrorFile(StringRef path, std::error_code ec) { + std::vector<std::unique_ptr<File>> result; + result.push_back(llvm::make_unique<ErrorFile>(path, ec)); + return result; +} + +static std::vector<std::unique_ptr<File>> +parseMemberFiles(std::unique_ptr<File> file) { + std::vector<std::unique_ptr<File>> members; + if (auto *archive = dyn_cast<ArchiveLibraryFile>(file.get())) { + if (std::error_code ec = archive->parseAllMembers(members)) + return makeErrorFile(file->path(), ec); + } else { + members.push_back(std::move(file)); + } + return members; +} + +std::vector<std::unique_ptr<File>> +loadFile(MachOLinkingContext &ctx, StringRef path, + raw_ostream &diag, bool wholeArchive, bool upwardDylib) { + if (ctx.logInputFiles()) + diag << path << "\n"; + + ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = ctx.getMemoryBuffer(path); + if (std::error_code ec = mbOrErr.getError()) + return makeErrorFile(path, ec); + ErrorOr<std::unique_ptr<File>> fileOrErr = + ctx.registry().loadFile(std::move(mbOrErr.get())); + if (std::error_code ec = fileOrErr.getError()) + return makeErrorFile(path, ec); + std::unique_ptr<File> &file = fileOrErr.get(); + + // If file is a dylib, inform LinkingContext about it. + if (SharedLibraryFile *shl = dyn_cast<SharedLibraryFile>(file.get())) { + if (std::error_code ec = shl->parse()) + return makeErrorFile(path, ec); + ctx.registerDylib(reinterpret_cast<mach_o::MachODylibFile *>(shl), + upwardDylib); + } + if (wholeArchive) + return parseMemberFiles(std::move(file)); + std::vector<std::unique_ptr<File>> files; + files.push_back(std::move(file)); + return files; +} + +} // end anonymous namespace + +// Test may be running on Windows. Canonicalize the path +// separator to '/' to get consistent outputs for tests. +static std::string canonicalizePath(StringRef path) { + char sep = llvm::sys::path::get_separator().front(); + if (sep != '/') { + std::string fixedPath = path; + std::replace(fixedPath.begin(), fixedPath.end(), sep, '/'); + return fixedPath; + } else { + return path; + } +} + +static void addFile(StringRef path, MachOLinkingContext &ctx, + bool loadWholeArchive, + bool upwardDylib, raw_ostream &diag) { + std::vector<std::unique_ptr<File>> files = + loadFile(ctx, path, diag, loadWholeArchive, upwardDylib); + for (std::unique_ptr<File> &file : files) + ctx.getNodes().push_back(llvm::make_unique<FileNode>(std::move(file))); +} + +// Export lists are one symbol per line. Blank lines are ignored. +// Trailing comments start with #. +static std::error_code parseExportsList(StringRef exportFilePath, + MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Map in export list file. + ErrorOr<std::unique_ptr<MemoryBuffer>> mb = + MemoryBuffer::getFileOrSTDIN(exportFilePath); + if (std::error_code ec = mb.getError()) + return ec; + ctx.addInputFileDependency(exportFilePath); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + // Ignore trailing # comments. + std::pair<StringRef, StringRef> symAndComment = line.split('#'); + StringRef sym = symAndComment.first.trim(); + if (!sym.empty()) + ctx.addExportSymbol(sym); + buffer = lineAndRest.second; + } + return std::error_code(); +} + +/// Order files are one symbol per line. Blank lines are ignored. +/// Trailing comments start with #. Symbol names can be prefixed with an +/// architecture name and/or .o leaf name. Examples: +/// _foo +/// bar.o:_bar +/// libfrob.a(bar.o):_bar +/// x86_64:_foo64 +static std::error_code parseOrderFile(StringRef orderFilePath, + MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Map in order file. + ErrorOr<std::unique_ptr<MemoryBuffer>> mb = + MemoryBuffer::getFileOrSTDIN(orderFilePath); + if (std::error_code ec = mb.getError()) + return ec; + ctx.addInputFileDependency(orderFilePath); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + buffer = lineAndRest.second; + // Ignore trailing # comments. + std::pair<StringRef, StringRef> symAndComment = line.split('#'); + if (symAndComment.first.empty()) + continue; + StringRef sym = symAndComment.first.trim(); + if (sym.empty()) + continue; + // Check for prefix. + StringRef prefix; + std::pair<StringRef, StringRef> prefixAndSym = sym.split(':'); + if (!prefixAndSym.second.empty()) { + sym = prefixAndSym.second; + prefix = prefixAndSym.first; + if (!prefix.endswith(".o") && !prefix.endswith(".o)")) { + // If arch name prefix does not match arch being linked, ignore symbol. + if (!ctx.archName().equals(prefix)) + continue; + prefix = ""; + } + } else + sym = prefixAndSym.first; + if (!sym.empty()) { + ctx.appendOrderedSymbol(sym, prefix); + //llvm::errs() << sym << ", prefix=" << prefix << "\n"; + } + } + return std::error_code(); +} + +// +// There are two variants of the -filelist option: +// +// -filelist <path> +// In this variant, the path is to a text file which contains one file path +// per line. There are no comments or trimming of whitespace. +// +// -fileList <path>,<dir> +// In this variant, the path is to a text file which contains a partial path +// per line. The <dir> prefix is prepended to each partial path. +// +static llvm::Error loadFileList(StringRef fileListPath, + MachOLinkingContext &ctx, bool forceLoad, + raw_ostream &diagnostics) { + // If there is a comma, split off <dir>. + std::pair<StringRef, StringRef> opt = fileListPath.split(','); + StringRef filePath = opt.first; + StringRef dirName = opt.second; + ctx.addInputFileDependency(filePath); + // Map in file list file. + ErrorOr<std::unique_ptr<MemoryBuffer>> mb = + MemoryBuffer::getFileOrSTDIN(filePath); + if (std::error_code ec = mb.getError()) + return llvm::errorCodeToError(ec); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + StringRef path; + if (!dirName.empty()) { + // If there is a <dir> then prepend dir to each line. + SmallString<256> fullPath; + fullPath.assign(dirName); + llvm::sys::path::append(fullPath, Twine(line)); + path = ctx.copy(fullPath.str()); + } else { + // No <dir> use whole line as input file path. + path = ctx.copy(line); + } + if (!ctx.pathExists(path)) { + return llvm::make_error<GenericError>(Twine("File not found '") + + path + + "'"); + } + if (ctx.testingFileUsage()) { + diagnostics << "Found filelist entry " << canonicalizePath(path) << '\n'; + } + addFile(path, ctx, forceLoad, false, diagnostics); + buffer = lineAndRest.second; + } + return llvm::Error::success(); +} + +/// Parse number assuming it is base 16, but allow 0x prefix. +static bool parseNumberBase16(StringRef numStr, uint64_t &baseAddress) { + if (numStr.startswith_lower("0x")) + numStr = numStr.drop_front(2); + return numStr.getAsInteger(16, baseAddress); +} + +static void parseLLVMOptions(const LinkingContext &ctx) { + // Honor -mllvm + if (!ctx.llvmOptions().empty()) { + unsigned numArgs = ctx.llvmOptions().size(); + auto **args = new const char *[numArgs + 2]; + args[0] = "lld (LLVM option parsing)"; + for (unsigned i = 0; i != numArgs; ++i) + args[i + 1] = ctx.llvmOptions()[i]; + args[numArgs + 1] = nullptr; + llvm::cl::ParseCommandLineOptions(numArgs + 1, args); + } +} + +namespace lld { +namespace mach_o { + +bool parse(llvm::ArrayRef<const char *> args, MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Parse command line options using DarwinLdOptions.td + DarwinLdOptTable table; + unsigned missingIndex; + unsigned missingCount; + llvm::opt::InputArgList parsedArgs = + table.ParseArgs(args.slice(1), missingIndex, missingCount); + if (missingCount) { + diagnostics << "error: missing arg value for '" + << parsedArgs.getArgString(missingIndex) << "' expected " + << missingCount << " argument(s).\n"; + return false; + } + + for (auto unknownArg : parsedArgs.filtered(OPT_UNKNOWN)) { + diagnostics << "warning: ignoring unknown argument: " + << unknownArg->getAsString(parsedArgs) << "\n"; + } + + // Figure out output kind ( -dylib, -r, -bundle, -preload, or -static ) + llvm::MachO::HeaderFileType fileType = llvm::MachO::MH_EXECUTE; + bool isStaticExecutable = false; + if (llvm::opt::Arg *kind = parsedArgs.getLastArg( + OPT_dylib, OPT_relocatable, OPT_bundle, OPT_static, OPT_preload)) { + switch (kind->getOption().getID()) { + case OPT_dylib: + fileType = llvm::MachO::MH_DYLIB; + break; + case OPT_relocatable: + fileType = llvm::MachO::MH_OBJECT; + break; + case OPT_bundle: + fileType = llvm::MachO::MH_BUNDLE; + break; + case OPT_static: + fileType = llvm::MachO::MH_EXECUTE; + isStaticExecutable = true; + break; + case OPT_preload: + fileType = llvm::MachO::MH_PRELOAD; + break; + } + } + + // Handle -arch xxx + MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; + if (llvm::opt::Arg *archStr = parsedArgs.getLastArg(OPT_arch)) { + arch = MachOLinkingContext::archFromName(archStr->getValue()); + if (arch == MachOLinkingContext::arch_unknown) { + diagnostics << "error: unknown arch named '" << archStr->getValue() + << "'\n"; + return false; + } + } + // If no -arch specified, scan input files to find first non-fat .o file. + if (arch == MachOLinkingContext::arch_unknown) { + for (auto &inFile : parsedArgs.filtered(OPT_INPUT)) { + // This is expensive because it opens and maps the file. But that is + // ok because no -arch is rare. + if (MachOLinkingContext::isThinObjectFile(inFile->getValue(), arch)) + break; + } + if (arch == MachOLinkingContext::arch_unknown && + !parsedArgs.getLastArg(OPT_test_file_usage)) { + // If no -arch and no options at all, print usage message. + if (parsedArgs.size() == 0) + table.PrintHelp(llvm::outs(), args[0], "LLVM Linker", false); + else + diagnostics << "error: -arch not specified and could not be inferred\n"; + return false; + } + } + + // Handle -macosx_version_min or -ios_version_min + MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown; + uint32_t minOSVersion = 0; + if (llvm::opt::Arg *minOS = + parsedArgs.getLastArg(OPT_macosx_version_min, OPT_ios_version_min, + OPT_ios_simulator_version_min)) { + switch (minOS->getOption().getID()) { + case OPT_macosx_version_min: + os = MachOLinkingContext::OS::macOSX; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed macosx_version_min value\n"; + return false; + } + break; + case OPT_ios_version_min: + os = MachOLinkingContext::OS::iOS; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed ios_version_min value\n"; + return false; + } + break; + case OPT_ios_simulator_version_min: + os = MachOLinkingContext::OS::iOS_simulator; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed ios_simulator_version_min value\n"; + return false; + } + break; + } + } else { + // No min-os version on command line, check environment variables + } + + // Handle export_dynamic + // FIXME: Should we warn when this applies to something other than a static + // executable or dylib? Those are the only cases where this has an effect. + // Note, this has to come before ctx.configure() so that we get the correct + // value for _globalsAreDeadStripRoots. + bool exportDynamicSymbols = parsedArgs.hasArg(OPT_export_dynamic); + + // Now that there's enough information parsed in, let the linking context + // set up default values. + ctx.configure(fileType, arch, os, minOSVersion, exportDynamicSymbols); + + // Handle -e xxx + if (llvm::opt::Arg *entry = parsedArgs.getLastArg(OPT_entry)) + ctx.setEntrySymbolName(entry->getValue()); + + // Handle -o xxx + if (llvm::opt::Arg *outpath = parsedArgs.getLastArg(OPT_output)) + ctx.setOutputPath(outpath->getValue()); + else + ctx.setOutputPath("a.out"); + + // Handle -image_base XXX and -seg1addr XXXX + if (llvm::opt::Arg *imageBase = parsedArgs.getLastArg(OPT_image_base)) { + uint64_t baseAddress; + if (parseNumberBase16(imageBase->getValue(), baseAddress)) { + diagnostics << "error: image_base expects a hex number\n"; + return false; + } else if (baseAddress < ctx.pageZeroSize()) { + diagnostics << "error: image_base overlaps with __PAGEZERO\n"; + return false; + } else if (baseAddress % ctx.pageSize()) { + diagnostics << "error: image_base must be a multiple of page size (" + << "0x" << llvm::utohexstr(ctx.pageSize()) << ")\n"; + return false; + } + + ctx.setBaseAddress(baseAddress); + } + + // Handle -dead_strip + if (parsedArgs.getLastArg(OPT_dead_strip)) + ctx.setDeadStripping(true); + + bool globalWholeArchive = false; + // Handle -all_load + if (parsedArgs.getLastArg(OPT_all_load)) + globalWholeArchive = true; + + // Handle -install_name + if (llvm::opt::Arg *installName = parsedArgs.getLastArg(OPT_install_name)) + ctx.setInstallName(installName->getValue()); + else + ctx.setInstallName(ctx.outputPath()); + + // Handle -mark_dead_strippable_dylib + if (parsedArgs.getLastArg(OPT_mark_dead_strippable_dylib)) + ctx.setDeadStrippableDylib(true); + + // Handle -compatibility_version and -current_version + if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_compatibility_version)) { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics + << "error: -compatibility_version can only be used with -dylib\n"; + return false; + } + uint32_t parsedVers; + if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { + diagnostics << "error: -compatibility_version value is malformed\n"; + return false; + } + ctx.setCompatibilityVersion(parsedVers); + } + + if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_current_version)) { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics << "-current_version can only be used with -dylib\n"; + return false; + } + uint32_t parsedVers; + if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { + diagnostics << "error: -current_version value is malformed\n"; + return false; + } + ctx.setCurrentVersion(parsedVers); + } + + // Handle -bundle_loader + if (llvm::opt::Arg *loader = parsedArgs.getLastArg(OPT_bundle_loader)) + ctx.setBundleLoader(loader->getValue()); + + // Handle -sectalign segname sectname align + for (auto &alignArg : parsedArgs.filtered(OPT_sectalign)) { + const char* segName = alignArg->getValue(0); + const char* sectName = alignArg->getValue(1); + const char* alignStr = alignArg->getValue(2); + if ((alignStr[0] == '0') && (alignStr[1] == 'x')) + alignStr += 2; + unsigned long long alignValue; + if (llvm::getAsUnsignedInteger(alignStr, 16, alignValue)) { + diagnostics << "error: -sectalign alignment value '" + << alignStr << "' not a valid number\n"; + return false; + } + uint16_t align = 1 << llvm::countTrailingZeros(alignValue); + if (!llvm::isPowerOf2_64(alignValue)) { + diagnostics << "warning: alignment for '-sectalign " + << segName << " " << sectName + << llvm::format(" 0x%llX", alignValue) + << "' is not a power of two, using " + << llvm::format("0x%08X", align) << "\n"; + } + ctx.addSectionAlignment(segName, sectName, align); + } + + // Handle -mllvm + for (auto &llvmArg : parsedArgs.filtered(OPT_mllvm)) { + ctx.appendLLVMOption(llvmArg->getValue()); + } + + // Handle -print_atoms + if (parsedArgs.getLastArg(OPT_print_atoms)) + ctx.setPrintAtoms(); + + // Handle -t (trace) option. + if (parsedArgs.getLastArg(OPT_t)) + ctx.setLogInputFiles(true); + + // Handle -demangle option. + if (parsedArgs.getLastArg(OPT_demangle)) + ctx.setDemangleSymbols(true); + + // Handle -keep_private_externs + if (parsedArgs.getLastArg(OPT_keep_private_externs)) { + ctx.setKeepPrivateExterns(true); + if (ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + diagnostics << "warning: -keep_private_externs only used in -r mode\n"; + } + + // Handle -dependency_info <path> used by Xcode. + if (llvm::opt::Arg *depInfo = parsedArgs.getLastArg(OPT_dependency_info)) { + if (std::error_code ec = ctx.createDependencyFile(depInfo->getValue())) { + diagnostics << "warning: " << ec.message() + << ", processing '-dependency_info " + << depInfo->getValue() + << "'\n"; + } + } + + // In -test_file_usage mode, we'll be given an explicit list of paths that + // exist. We'll also be expected to print out information about how we located + // libraries and so on that the user specified, but not to actually do any + // linking. + if (parsedArgs.getLastArg(OPT_test_file_usage)) { + ctx.setTestingFileUsage(); + + // With paths existing by fiat, linking is not going to end well. + ctx.setDoNothing(true); + + // Only bother looking for an existence override if we're going to use it. + for (auto existingPath : parsedArgs.filtered(OPT_path_exists)) { + ctx.addExistingPathForDebug(existingPath->getValue()); + } + } + + // Register possible input file parsers. + if (!ctx.doNothing()) { + ctx.registry().addSupportMachOObjects(ctx); + ctx.registry().addSupportArchives(ctx.logInputFiles()); + ctx.registry().addSupportYamlFiles(); + } + + // Now construct the set of library search directories, following ld64's + // baroque set of accumulated hacks. Mostly, the algorithm constructs + // { syslibroots } x { libpaths } + // + // Unfortunately, there are numerous exceptions: + // 1. Only absolute paths get modified by syslibroot options. + // 2. If there is just 1 -syslibroot, system paths not found in it are + // skipped. + // 3. If the last -syslibroot is "/", all of them are ignored entirely. + // 4. If { syslibroots } x path == {}, the original path is kept. + std::vector<StringRef> sysLibRoots; + for (auto syslibRoot : parsedArgs.filtered(OPT_syslibroot)) { + sysLibRoots.push_back(syslibRoot->getValue()); + } + if (!sysLibRoots.empty()) { + // Ignore all if last -syslibroot is "/". + if (sysLibRoots.back() != "/") + ctx.setSysLibRoots(sysLibRoots); + } + + // Paths specified with -L come first, and are not considered system paths for + // the case where there is precisely 1 -syslibroot. + for (auto libPath : parsedArgs.filtered(OPT_L)) { + ctx.addModifiedSearchDir(libPath->getValue()); + } + + // Process -F directories (where to look for frameworks). + for (auto fwPath : parsedArgs.filtered(OPT_F)) { + ctx.addFrameworkSearchDir(fwPath->getValue()); + } + + // -Z suppresses the standard search paths. + if (!parsedArgs.hasArg(OPT_Z)) { + ctx.addModifiedSearchDir("/usr/lib", true); + ctx.addModifiedSearchDir("/usr/local/lib", true); + ctx.addFrameworkSearchDir("/Library/Frameworks", true); + ctx.addFrameworkSearchDir("/System/Library/Frameworks", true); + } + + // Now that we've constructed the final set of search paths, print out those + // search paths in verbose mode. + if (parsedArgs.getLastArg(OPT_v)) { + diagnostics << "Library search paths:\n"; + for (auto path : ctx.searchDirs()) { + diagnostics << " " << path << '\n'; + } + diagnostics << "Framework search paths:\n"; + for (auto path : ctx.frameworkDirs()) { + diagnostics << " " << path << '\n'; + } + } + + // Handle -exported_symbols_list <file> + for (auto expFile : parsedArgs.filtered(OPT_exported_symbols_list)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::blackList) { + diagnostics << "error: -exported_symbols_list cannot be combined " + << "with -unexported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::whiteList); + if (std::error_code ec = parseExportsList(expFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-exported_symbols_list " + << expFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -exported_symbol <symbol> + for (auto symbol : parsedArgs.filtered(OPT_exported_symbol)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::blackList) { + diagnostics << "error: -exported_symbol cannot be combined " + << "with -unexported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::whiteList); + ctx.addExportSymbol(symbol->getValue()); + } + + // Handle -unexported_symbols_list <file> + for (auto expFile : parsedArgs.filtered(OPT_unexported_symbols_list)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) { + diagnostics << "error: -unexported_symbols_list cannot be combined " + << "with -exported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::blackList); + if (std::error_code ec = parseExportsList(expFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-unexported_symbols_list " + << expFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -unexported_symbol <symbol> + for (auto symbol : parsedArgs.filtered(OPT_unexported_symbol)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) { + diagnostics << "error: -unexported_symbol cannot be combined " + << "with -exported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::blackList); + ctx.addExportSymbol(symbol->getValue()); + } + + // Handle obosolete -multi_module and -single_module + if (llvm::opt::Arg *mod = + parsedArgs.getLastArg(OPT_multi_module, OPT_single_module)) { + if (mod->getOption().getID() == OPT_multi_module) { + diagnostics << "warning: -multi_module is obsolete and being ignored\n"; + } + else { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics << "warning: -single_module being ignored. " + "It is only for use when producing a dylib\n"; + } + } + } + + // Handle obsolete ObjC options: -objc_gc_compaction, -objc_gc, -objc_gc_only + if (parsedArgs.getLastArg(OPT_objc_gc_compaction)) { + diagnostics << "error: -objc_gc_compaction is not supported\n"; + return false; + } + + if (parsedArgs.getLastArg(OPT_objc_gc)) { + diagnostics << "error: -objc_gc is not supported\n"; + return false; + } + + if (parsedArgs.getLastArg(OPT_objc_gc_only)) { + diagnostics << "error: -objc_gc_only is not supported\n"; + return false; + } + + // Handle -pie or -no_pie + if (llvm::opt::Arg *pie = parsedArgs.getLastArg(OPT_pie, OPT_no_pie)) { + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + switch (ctx.os()) { + case MachOLinkingContext::OS::macOSX: + if ((minOSVersion < 0x000A0500) && + (pie->getOption().getID() == OPT_pie)) { + diagnostics << "-pie can only be used when targeting " + "Mac OS X 10.5 or later\n"; + return false; + } + break; + case MachOLinkingContext::OS::iOS: + if ((minOSVersion < 0x00040200) && + (pie->getOption().getID() == OPT_pie)) { + diagnostics << "-pie can only be used when targeting " + "iOS 4.2 or later\n"; + return false; + } + break; + case MachOLinkingContext::OS::iOS_simulator: + if (pie->getOption().getID() == OPT_no_pie) { + diagnostics << "iOS simulator programs must be built PIE\n"; + return false; + } + break; + case MachOLinkingContext::OS::unknown: + break; + } + ctx.setPIE(pie->getOption().getID() == OPT_pie); + break; + case llvm::MachO::MH_PRELOAD: + break; + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + diagnostics << "warning: " << pie->getSpelling() << " being ignored. " + << "It is only used when linking main executables\n"; + break; + default: + diagnostics << pie->getSpelling() + << " can only used when linking main executables\n"; + return false; + } + } + + // Handle -version_load_command or -no_version_load_command + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_version_load_command, + OPT_no_version_load_command)) { + flagOn = arg->getOption().getID() == OPT_version_load_command; + flagOff = arg->getOption().getID() == OPT_no_version_load_command; + } + + // default to adding version load command for dynamic code, + // static code must opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + ctx.setGenerateVersionLoadCommand(false); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static exectuables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateVersionLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateVersionLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateVersionLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateVersionLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle -function_starts or -no_function_starts + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_function_starts, + OPT_no_function_starts)) { + flagOn = arg->getOption().getID() == OPT_function_starts; + flagOff = arg->getOption().getID() == OPT_no_function_starts; + } + + // default to adding functions start for dynamic code, static code must + // opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + ctx.setGenerateFunctionStartsLoadCommand(false); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static exectuables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateFunctionStartsLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateFunctionStartsLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateFunctionStartsLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateFunctionStartsLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle -data_in_code_info or -no_data_in_code_info + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_data_in_code_info, + OPT_no_data_in_code_info)) { + flagOn = arg->getOption().getID() == OPT_data_in_code_info; + flagOff = arg->getOption().getID() == OPT_no_data_in_code_info; + } + + // default to adding data in code for dynamic code, static code must + // opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static exectuables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateDataInCodeLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle sdk_version + if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_sdk_version)) { + uint32_t sdkVersion = 0; + if (MachOLinkingContext::parsePackedVersion(arg->getValue(), + sdkVersion)) { + diagnostics << "error: malformed sdkVersion value\n"; + return false; + } + ctx.setSdkVersion(sdkVersion); + } else if (ctx.generateVersionLoadCommand()) { + // If we don't have an sdk version, but were going to emit a load command + // with min_version, then we need to give an warning as we have no sdk + // version to put in that command. + // FIXME: We need to decide whether to make this an error. + diagnostics << "warning: -sdk_version is required when emitting " + "min version load command. " + "Setting sdk version to match provided min version\n"; + ctx.setSdkVersion(ctx.osMinVersion()); + } + + // Handle source_version + if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_source_version)) { + uint64_t version = 0; + if (MachOLinkingContext::parsePackedVersion(arg->getValue(), + version)) { + diagnostics << "error: malformed source_version value\n"; + return false; + } + ctx.setSourceVersion(version); + } + + // Handle stack_size + if (llvm::opt::Arg *stackSize = parsedArgs.getLastArg(OPT_stack_size)) { + uint64_t stackSizeVal; + if (parseNumberBase16(stackSize->getValue(), stackSizeVal)) { + diagnostics << "error: stack_size expects a hex number\n"; + return false; + } + if ((stackSizeVal % ctx.pageSize()) != 0) { + diagnostics << "error: stack_size must be a multiple of page size (" + << "0x" << llvm::utohexstr(ctx.pageSize()) << ")\n"; + return false; + } + + ctx.setStackSize(stackSizeVal); + } + + // Handle debug info handling options: -S + if (parsedArgs.hasArg(OPT_S)) + ctx.setDebugInfoMode(MachOLinkingContext::DebugInfoMode::noDebugMap); + + // Handle -order_file <file> + for (auto orderFile : parsedArgs.filtered(OPT_order_file)) { + if (std::error_code ec = parseOrderFile(orderFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-order_file " + << orderFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -flat_namespace. + if (llvm::opt::Arg *ns = + parsedArgs.getLastArg(OPT_flat_namespace, OPT_twolevel_namespace)) { + if (ns->getOption().getID() == OPT_flat_namespace) + ctx.setUseFlatNamespace(true); + } + + // Handle -undefined + if (llvm::opt::Arg *undef = parsedArgs.getLastArg(OPT_undefined)) { + MachOLinkingContext::UndefinedMode UndefMode; + if (StringRef(undef->getValue()).equals("error")) + UndefMode = MachOLinkingContext::UndefinedMode::error; + else if (StringRef(undef->getValue()).equals("warning")) + UndefMode = MachOLinkingContext::UndefinedMode::warning; + else if (StringRef(undef->getValue()).equals("suppress")) + UndefMode = MachOLinkingContext::UndefinedMode::suppress; + else if (StringRef(undef->getValue()).equals("dynamic_lookup")) + UndefMode = MachOLinkingContext::UndefinedMode::dynamicLookup; + else { + diagnostics << "error: invalid option to -undefined " + "[ warning | error | suppress | dynamic_lookup ]\n"; + return false; + } + + if (ctx.useFlatNamespace()) { + // If we're using -flat_namespace then 'warning', 'suppress' and + // 'dynamic_lookup' are all equivalent, so map them to 'suppress'. + if (UndefMode != MachOLinkingContext::UndefinedMode::error) + UndefMode = MachOLinkingContext::UndefinedMode::suppress; + } else { + // If we're using -twolevel_namespace then 'warning' and 'suppress' are + // illegal. Emit a diagnostic if they've been (mis)used. + if (UndefMode == MachOLinkingContext::UndefinedMode::warning || + UndefMode == MachOLinkingContext::UndefinedMode::suppress) { + diagnostics << "error: can't use -undefined warning or suppress with " + "-twolevel_namespace\n"; + return false; + } + } + + ctx.setUndefinedMode(UndefMode); + } + + // Handle -no_objc_category_merging. + if (parsedArgs.getLastArg(OPT_no_objc_category_merging)) + ctx.setMergeObjCCategories(false); + + // Handle -rpath <path> + if (parsedArgs.hasArg(OPT_rpath)) { + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!ctx.minOS("10.5", "2.0")) { + if (ctx.os() == MachOLinkingContext::OS::macOSX) { + diagnostics << "error: -rpath can only be used when targeting " + "OS X 10.5 or later\n"; + } else { + diagnostics << "error: -rpath can only be used when targeting " + "iOS 2.0 or later\n"; + } + return false; + } + break; + default: + diagnostics << "error: -rpath can only be used when creating " + "a dynamic final linked image\n"; + return false; + } + + for (auto rPath : parsedArgs.filtered(OPT_rpath)) { + ctx.addRpath(rPath->getValue()); + } + } + + // Parse the LLVM options before we process files in case the file handling + // makes use of things like DEBUG(). + parseLLVMOptions(ctx); + + // Handle input files and sectcreate. + for (auto &arg : parsedArgs) { + bool upward; + llvm::Optional<StringRef> resolvedPath; + switch (arg->getOption().getID()) { + default: + continue; + case OPT_INPUT: + addFile(arg->getValue(), ctx, globalWholeArchive, false, diagnostics); + break; + case OPT_upward_library: + addFile(arg->getValue(), ctx, false, true, diagnostics); + break; + case OPT_force_load: + addFile(arg->getValue(), ctx, true, false, diagnostics); + break; + case OPT_l: + case OPT_upward_l: + upward = (arg->getOption().getID() == OPT_upward_l); + resolvedPath = ctx.searchLibrary(arg->getValue()); + if (!resolvedPath) { + diagnostics << "Unable to find library for " << arg->getSpelling() + << arg->getValue() << "\n"; + return false; + } else if (ctx.testingFileUsage()) { + diagnostics << "Found " << (upward ? "upward " : " ") << "library " + << canonicalizePath(resolvedPath.getValue()) << '\n'; + } + addFile(resolvedPath.getValue(), ctx, globalWholeArchive, + upward, diagnostics); + break; + case OPT_framework: + case OPT_upward_framework: + upward = (arg->getOption().getID() == OPT_upward_framework); + resolvedPath = ctx.findPathForFramework(arg->getValue()); + if (!resolvedPath) { + diagnostics << "Unable to find framework for " + << arg->getSpelling() << " " << arg->getValue() << "\n"; + return false; + } else if (ctx.testingFileUsage()) { + diagnostics << "Found " << (upward ? "upward " : " ") << "framework " + << canonicalizePath(resolvedPath.getValue()) << '\n'; + } + addFile(resolvedPath.getValue(), ctx, globalWholeArchive, + upward, diagnostics); + break; + case OPT_filelist: + if (auto ec = loadFileList(arg->getValue(), + ctx, globalWholeArchive, + diagnostics)) { + handleAllErrors(std::move(ec), [&](const llvm::ErrorInfoBase &EI) { + diagnostics << "error: "; + EI.log(diagnostics); + diagnostics << ", processing '-filelist " << arg->getValue() << "'\n"; + }); + return false; + } + break; + case OPT_sectcreate: { + const char* seg = arg->getValue(0); + const char* sect = arg->getValue(1); + const char* fileName = arg->getValue(2); + + ErrorOr<std::unique_ptr<MemoryBuffer>> contentOrErr = + MemoryBuffer::getFile(fileName); + + if (!contentOrErr) { + diagnostics << "error: can't open -sectcreate file " << fileName << "\n"; + return false; + } + + ctx.addSectCreateSection(seg, sect, std::move(*contentOrErr)); + } + break; + } + } + + if (ctx.getNodes().empty()) { + diagnostics << "No input files\n"; + return false; + } + + // Validate the combination of options used. + return ctx.validate(diagnostics); +} + +static void createFiles(MachOLinkingContext &ctx, bool Implicit) { + std::vector<std::unique_ptr<File>> Files; + if (Implicit) + ctx.createImplicitFiles(Files); + else + ctx.createInternalFiles(Files); + for (auto i = Files.rbegin(), e = Files.rend(); i != e; ++i) { + auto &members = ctx.getNodes(); + members.insert(members.begin(), llvm::make_unique<FileNode>(std::move(*i))); + } +} + +/// This is where the link is actually performed. +bool link(llvm::ArrayRef<const char *> args, raw_ostream &diagnostics) { + MachOLinkingContext ctx; + if (!parse(args, ctx, diagnostics)) + return false; + if (ctx.doNothing()) + return true; + if (ctx.getNodes().empty()) + return false; + + for (std::unique_ptr<Node> &ie : ctx.getNodes()) + if (FileNode *node = dyn_cast<FileNode>(ie.get())) + node->getFile()->parse(); + + createFiles(ctx, false /* Implicit */); + + // Give target a chance to add files + createFiles(ctx, true /* Implicit */); + + // Give target a chance to postprocess input files. + // Mach-O uses this chance to move all object files before library files. + ctx.finalizeInputFiles(); + + // Do core linking. + ScopedTask resolveTask(getDefaultDomain(), "Resolve"); + Resolver resolver(ctx); + if (!resolver.resolve()) + return false; + SimpleFile *merged = nullptr; + { + std::unique_ptr<SimpleFile> mergedFile = resolver.resultFile(); + merged = mergedFile.get(); + auto &members = ctx.getNodes(); + members.insert(members.begin(), + llvm::make_unique<FileNode>(std::move(mergedFile))); + } + resolveTask.end(); + + // Run passes on linked atoms. + ScopedTask passTask(getDefaultDomain(), "Passes"); + PassManager pm; + ctx.addPasses(pm); + if (auto ec = pm.runOnFile(*merged)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + diagnostics << "Failed to run passes on file '" << ctx.outputPath() + << "': "; + logAllUnhandledErrors(std::move(ec), diagnostics, std::string()); + return false; + } + + passTask.end(); + + // Give linked atoms to Writer to generate output file. + ScopedTask writeTask(getDefaultDomain(), "Write"); + if (auto ec = ctx.writeFile(*merged)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + diagnostics << "Failed to write file '" << ctx.outputPath() << "': "; + logAllUnhandledErrors(std::move(ec), diagnostics, std::string()); + return false; + } + + return true; +} + +} // end namespace mach_o +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/Driver/DarwinLdOptions.td b/contrib/llvm/tools/lld/lib/Driver/DarwinLdOptions.td new file mode 100644 index 000000000000..fa07f33646e7 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Driver/DarwinLdOptions.td @@ -0,0 +1,242 @@ +include "llvm/Option/OptParser.td" + + +// output kinds +def grp_kind : OptionGroup<"outs">, HelpText<"OUTPUT KIND">; +def relocatable : Flag<["-"], "r">, + HelpText<"Create relocatable object file">, Group<grp_kind>; +def static : Flag<["-"], "static">, + HelpText<"Create static executable">, Group<grp_kind>; +def dynamic : Flag<["-"], "dynamic">, + HelpText<"Create dynamic executable (default)">,Group<grp_kind>; +def dylib : Flag<["-"], "dylib">, + HelpText<"Create dynamic library">, Group<grp_kind>; +def bundle : Flag<["-"], "bundle">, + HelpText<"Create dynamic bundle">, Group<grp_kind>; +def execute : Flag<["-"], "execute">, + HelpText<"Create main executable (default)">, Group<grp_kind>; +def preload : Flag<["-"], "preload">, + HelpText<"Create binary for use with embedded systems">, Group<grp_kind>; + +// optimizations +def grp_opts : OptionGroup<"opts">, HelpText<"OPTIMIZATIONS">; +def dead_strip : Flag<["-"], "dead_strip">, + HelpText<"Remove unreference code and data">, Group<grp_opts>; +def macosx_version_min : Separate<["-"], "macosx_version_min">, + MetaVarName<"<version>">, + HelpText<"Minimum Mac OS X version">, Group<grp_opts>; +def ios_version_min : Separate<["-"], "ios_version_min">, + MetaVarName<"<version>">, + HelpText<"Minimum iOS version">, Group<grp_opts>; +def iphoneos_version_min : Separate<["-"], "iphoneos_version_min">, + Alias<ios_version_min>; +def ios_simulator_version_min : Separate<["-"], "ios_simulator_version_min">, + MetaVarName<"<version>">, + HelpText<"Minimum iOS simulator version">, Group<grp_opts>; +def sdk_version : Separate<["-"], "sdk_version">, + MetaVarName<"<version>">, + HelpText<"SDK version">, Group<grp_opts>; +def source_version : Separate<["-"], "source_version">, + MetaVarName<"<version>">, + HelpText<"Source version">, Group<grp_opts>; +def version_load_command : Flag<["-"], "version_load_command">, + HelpText<"Force generation of a version load command">, Group<grp_opts>; +def no_version_load_command : Flag<["-"], "no_version_load_command">, + HelpText<"Disable generation of a version load command">, Group<grp_opts>; +def function_starts : Flag<["-"], "function_starts">, + HelpText<"Force generation of a function starts load command">, + Group<grp_opts>; +def no_function_starts : Flag<["-"], "no_function_starts">, + HelpText<"Disable generation of a function starts load command">, + Group<grp_opts>; +def data_in_code_info : Flag<["-"], "data_in_code_info">, + HelpText<"Force generation of a data in code load command">, + Group<grp_opts>; +def no_data_in_code_info : Flag<["-"], "no_data_in_code_info">, + HelpText<"Disable generation of a data in code load command">, + Group<grp_opts>; +def mllvm : Separate<["-"], "mllvm">, + MetaVarName<"<option>">, + HelpText<"Options to pass to LLVM during LTO">, Group<grp_opts>; +def exported_symbols_list : Separate<["-"], "exported_symbols_list">, + MetaVarName<"<file-path>">, + HelpText<"Restricts which symbols will be exported">, Group<grp_opts>; +def exported_symbol : Separate<["-"], "exported_symbol">, + MetaVarName<"<symbol>">, + HelpText<"Restricts which symbols will be exported">, Group<grp_opts>; +def unexported_symbols_list : Separate<["-"], "unexported_symbols_list">, + MetaVarName<"<file-path>">, + HelpText<"Lists symbols that should not be exported">, Group<grp_opts>; +def unexported_symbol : Separate<["-"], "unexported_symbol">, + MetaVarName<"<symbol>">, + HelpText<"A symbol which should not be exported">, Group<grp_opts>; +def keep_private_externs : Flag<["-"], "keep_private_externs">, + HelpText<"Private extern (hidden) symbols should not be transformed " + "into local symbols">, Group<grp_opts>; +def order_file : Separate<["-"], "order_file">, + MetaVarName<"<file-path>">, + HelpText<"re-order and move specified symbols to start of their section">, + Group<grp_opts>; +def flat_namespace : Flag<["-"], "flat_namespace">, + HelpText<"Resolves symbols in any (transitively) linked dynamic libraries. " + "Source libraries are not recorded: dyld will re-search all " + "images at runtime and use the first definition found.">, + Group<grp_opts>; +def twolevel_namespace : Flag<["-"], "twolevel_namespace">, + HelpText<"Resolves symbols in listed libraries only. Source libraries are " + "recorded in the symbol table.">, + Group<grp_opts>; +def undefined : Separate<["-"], "undefined">, + MetaVarName<"<undefined>">, + HelpText<"Determines how undefined symbols are handled.">, + Group<grp_opts>; +def no_objc_category_merging : Flag<["-"], "no_objc_category_merging">, + HelpText<"Disables the optimisation which merges Objective-C categories " + "on a class in to the class itself.">, + Group<grp_opts>; + +// main executable options +def grp_main : OptionGroup<"opts">, HelpText<"MAIN EXECUTABLE OPTIONS">; +def entry : Separate<["-"], "e">, + MetaVarName<"<entry-name>">, + HelpText<"entry symbol name">,Group<grp_main>; +def pie : Flag<["-"], "pie">, + HelpText<"Create Position Independent Executable (for ASLR)">, + Group<grp_main>; +def no_pie : Flag<["-"], "no_pie">, + HelpText<"Do not create Position Independent Executable">, + Group<grp_main>; +def stack_size : Separate<["-"], "stack_size">, + HelpText<"Specifies the maximum stack size for the main thread in a program. " + "Must be a page-size multiple. (default=8Mb)">, + Group<grp_main>; +def export_dynamic : Flag<["-"], "export_dynamic">, + HelpText<"Preserves all global symbols in main executables during LTO">, + Group<grp_main>; + +// dylib executable options +def grp_dylib : OptionGroup<"opts">, HelpText<"DYLIB EXECUTABLE OPTIONS">; +def install_name : Separate<["-"], "install_name">, + MetaVarName<"<path>">, + HelpText<"The dylib's install name">, Group<grp_dylib>; +def mark_dead_strippable_dylib : Flag<["-"], "mark_dead_strippable_dylib">, + HelpText<"Marks the dylib as having no side effects during initialization">, + Group<grp_dylib>; +def compatibility_version : Separate<["-"], "compatibility_version">, + MetaVarName<"<version>">, + HelpText<"The dylib's compatibility version">, Group<grp_dylib>; +def current_version : Separate<["-"], "current_version">, + MetaVarName<"<version>">, + HelpText<"The dylib's current version">, Group<grp_dylib>; + +// dylib executable options - compatibility aliases +def dylib_install_name : Separate<["-"], "dylib_install_name">, + Alias<install_name>; +def dylib_compatibility_version : Separate<["-"], "dylib_compatibility_version">, + MetaVarName<"<version>">, Alias<compatibility_version>; +def dylib_current_version : Separate<["-"], "dylib_current_version">, + MetaVarName<"<version>">, Alias<current_version>; + +// bundle executable options +def grp_bundle : OptionGroup<"opts">, HelpText<"BUNDLE EXECUTABLE OPTIONS">; +def bundle_loader : Separate<["-"], "bundle_loader">, + MetaVarName<"<path>">, + HelpText<"The executable that will be loading this Mach-O bundle">, + Group<grp_bundle>; + +// library options +def grp_libs : OptionGroup<"libs">, HelpText<"LIBRARY OPTIONS">; +def L : JoinedOrSeparate<["-"], "L">, + MetaVarName<"<dir>">, + HelpText<"Add directory to library search path">, Group<grp_libs>; +def F : JoinedOrSeparate<["-"], "F">, + MetaVarName<"<dir>">, + HelpText<"Add directory to framework search path">, Group<grp_libs>; +def Z : Flag<["-"], "Z">, + HelpText<"Do not search standard directories for libraries or frameworks">; +def all_load : Flag<["-"], "all_load">, + HelpText<"Forces all members of all static libraries to be loaded">, + Group<grp_libs>; +def force_load : Separate<["-"], "force_load">, + MetaVarName<"<library-path>">, + HelpText<"Forces all members of specified static libraries to be loaded">, + Group<grp_libs>; +def syslibroot : Separate<["-"], "syslibroot">, MetaVarName<"<dir>">, + HelpText<"Add path to SDK to all absolute library search paths">, + Group<grp_libs>; + +// Input options +def l : Joined<["-"], "l">, + MetaVarName<"<libname>">, + HelpText<"Base name of library searched for in -L directories">; +def upward_l : Joined<["-"], "upward-l">, + MetaVarName<"<libname>">, + HelpText<"Base name of upward library searched for in -L directories">; +def framework : Separate<["-"], "framework">, + MetaVarName<"<name>">, + HelpText<"Base name of framework searched for in -F directories">; +def upward_framework : Separate<["-"], "upward_framework">, + MetaVarName<"<name>">, + HelpText<"Base name of upward framework searched for in -F directories">; +def upward_library : Separate<["-"], "upward_library">, + MetaVarName<"<path>">, + HelpText<"path to upward dylib to link with">; +def filelist : Separate<["-"], "filelist">, + MetaVarName<"<path>">, + HelpText<"file containing paths to input files">; + + +// test case options +def print_atoms : Flag<["-"], "print_atoms">, + HelpText<"Emit output as yaml atoms">; +def test_file_usage : Flag<["-"], "test_file_usage">, + HelpText<"Only files specified by -file_exists are considered to exist. " + "Print which files would be used">; +def path_exists : Separate<["-"], "path_exists">, + MetaVarName<"<path>">, + HelpText<"Used with -test_file_usage to declare a path">; + + +// general options +def output : Separate<["-"], "o">, + MetaVarName<"<path>">, + HelpText<"Output file path">; +def arch : Separate<["-"], "arch">, + MetaVarName<"<arch-name>">, + HelpText<"Architecture to link">; +def sectalign : MultiArg<["-"], "sectalign", 3>, + MetaVarName<"<segname> <sectname> <alignment>">, + HelpText<"Alignment for segment/section">; +def sectcreate : MultiArg<["-"], "sectcreate", 3>, + MetaVarName<"<segname> <sectname> <file>">, + HelpText<"Create section <segname>/<sectname> from contents of <file>">; +def image_base : Separate<["-"], "image_base">; +def seg1addr : Separate<["-"], "seg1addr">, Alias<image_base>; +def demangle : Flag<["-"], "demangle">, + HelpText<"Demangles symbol names in errors and warnings">; +def dependency_info : Separate<["-"], "dependency_info">, + MetaVarName<"<file>">, + HelpText<"Write binary list of files used during link">; +def S : Flag<["-"], "S">, + HelpText<"Remove debug information (STABS or DWARF) from the output file">; +def rpath : Separate<["-"], "rpath">, + MetaVarName<"<path>">, + HelpText<"Add path to the runpath search path list for image being created">; + +def t : Flag<["-"], "t">, + HelpText<"Print the names of the input files as ld processes them">; +def v : Flag<["-"], "v">, + HelpText<"Print linker information">; + +// Obsolete options +def grp_obsolete : OptionGroup<"obsolete">, HelpText<"OBSOLETE OPTIONS">; +def single_module : Flag<["-"], "single_module">, + HelpText<"Default for dylibs">, Group<grp_obsolete>; +def multi_module : Flag<["-"], "multi_module">, + HelpText<"Unsupported way to build dylibs">, Group<grp_obsolete>; +def objc_gc_compaction : Flag<["-"], "objc_gc_compaction">, + HelpText<"Unsupported ObjC GC option">, Group<grp_obsolete>; +def objc_gc : Flag<["-"], "objc_gc">, + HelpText<"Unsupported ObjC GC option">, Group<grp_obsolete>; +def objc_gc_only : Flag<["-"], "objc_gc_only">, + HelpText<"Unsupported ObjC GC option">, Group<grp_obsolete>; diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/CMakeLists.txt b/contrib/llvm/tools/lld/lib/ReaderWriter/CMakeLists.txt new file mode 100644 index 000000000000..8751d569b754 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/CMakeLists.txt @@ -0,0 +1,21 @@ +add_subdirectory(MachO) +add_subdirectory(YAML) + +if (MSVC) + add_definitions(-wd4062) # Suppress 'warning C4062: Enumerator has no associated handler in a switch statement.' +endif() + +add_lld_library(lldReaderWriter + FileArchive.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/ReaderWriter + + LINK_COMPONENTS + Object + Support + + LINK_LIBS + lldCore + lldYAML + ) diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp new file mode 100644 index 000000000000..799f947a8c82 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp @@ -0,0 +1,225 @@ +//===- lib/ReaderWriter/FileArchive.cpp -----------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reader.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> +#include <set> +#include <string> +#include <system_error> +#include <unordered_map> +#include <utility> +#include <vector> + +using llvm::object::Archive; + +namespace lld { + +namespace { + +/// \brief The FileArchive class represents an Archive Library file +class FileArchive : public lld::ArchiveLibraryFile { +public: + FileArchive(std::unique_ptr<MemoryBuffer> mb, const Registry ®, + StringRef path, bool logLoading) + : ArchiveLibraryFile(path), _mb(std::shared_ptr<MemoryBuffer>(mb.release())), + _registry(reg), _logLoading(logLoading) {} + + /// \brief Check if any member of the archive contains an Atom with the + /// specified name and return the File object for that member, or nullptr. + File *find(StringRef name) override { + auto member = _symbolMemberMap.find(name); + if (member == _symbolMemberMap.end()) + return nullptr; + Archive::Child c = member->second; + + // Don't return a member already returned + Expected<StringRef> buf = c.getBuffer(); + if (!buf) { + // TODO: Actually report errors helpfully. + consumeError(buf.takeError()); + return nullptr; + } + const char *memberStart = buf->data(); + if (_membersInstantiated.count(memberStart)) + return nullptr; + _membersInstantiated.insert(memberStart); + + std::unique_ptr<File> result; + if (instantiateMember(c, result)) + return nullptr; + + File *file = result.get(); + _filesReturned.push_back(std::move(result)); + + // Give up the file pointer. It was stored and will be destroyed with destruction of FileArchive + return file; + } + + /// \brief parse each member + std::error_code + parseAllMembers(std::vector<std::unique_ptr<File>> &result) override { + if (std::error_code ec = parse()) + return ec; + llvm::Error err = llvm::Error::success(); + for (auto mf = _archive->child_begin(err), me = _archive->child_end(); + mf != me; ++mf) { + std::unique_ptr<File> file; + if (std::error_code ec = instantiateMember(*mf, file)) { + // err is Success (or we wouldn't be in the loop body) but we can't + // return without testing or consuming it. + consumeError(std::move(err)); + return ec; + } + result.push_back(std::move(file)); + } + if (err) + return errorToErrorCode(std::move(err)); + return std::error_code(); + } + + const AtomRange<DefinedAtom> defined() const override { + return _noDefinedAtoms; + } + + const AtomRange<UndefinedAtom> undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange<AbsoluteAtom> absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _noDefinedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + +protected: + std::error_code doParse() override { + // Make Archive object which will be owned by FileArchive object. + llvm::Error Err = llvm::Error::success(); + _archive.reset(new Archive(_mb->getMemBufferRef(), Err)); + if (Err) + return errorToErrorCode(std::move(Err)); + std::error_code ec; + if ((ec = buildTableOfContents())) + return ec; + return std::error_code(); + } + +private: + std::error_code instantiateMember(Archive::Child member, + std::unique_ptr<File> &result) const { + Expected<llvm::MemoryBufferRef> mbOrErr = member.getMemoryBufferRef(); + if (!mbOrErr) + return errorToErrorCode(mbOrErr.takeError()); + llvm::MemoryBufferRef mb = mbOrErr.get(); + std::string memberPath = (_archive->getFileName() + "(" + + mb.getBufferIdentifier() + ")").str(); + + if (_logLoading) + llvm::errs() << memberPath << "\n"; + + std::unique_ptr<MemoryBuffer> memberMB(MemoryBuffer::getMemBuffer( + mb.getBuffer(), mb.getBufferIdentifier(), false)); + + ErrorOr<std::unique_ptr<File>> fileOrErr = + _registry.loadFile(std::move(memberMB)); + if (std::error_code ec = fileOrErr.getError()) + return ec; + result = std::move(fileOrErr.get()); + if (std::error_code ec = result->parse()) + return ec; + result->setArchivePath(_archive->getFileName()); + + // The memory buffer is co-owned by the archive file and the children, + // so that the bufffer is deallocated when all the members are destructed. + result->setSharedMemoryBuffer(_mb); + return std::error_code(); + } + + std::error_code buildTableOfContents() { + DEBUG_WITH_TYPE("FileArchive", llvm::dbgs() + << "Table of contents for archive '" + << _archive->getFileName() << "':\n"); + for (const Archive::Symbol &sym : _archive->symbols()) { + StringRef name = sym.getName(); + Expected<Archive::Child> memberOrErr = sym.getMember(); + if (!memberOrErr) + return errorToErrorCode(memberOrErr.takeError()); + Archive::Child member = memberOrErr.get(); + DEBUG_WITH_TYPE("FileArchive", + llvm::dbgs() + << llvm::format("0x%08llX ", + member.getBuffer()->data()) + << "'" << name << "'\n"); + _symbolMemberMap.insert(std::make_pair(name, member)); + } + return std::error_code(); + } + + typedef std::unordered_map<StringRef, Archive::Child> MemberMap; + typedef std::set<const char *> InstantiatedSet; + + std::shared_ptr<MemoryBuffer> _mb; + const Registry &_registry; + std::unique_ptr<Archive> _archive; + MemberMap _symbolMemberMap; + InstantiatedSet _membersInstantiated; + bool _logLoading; + std::vector<std::unique_ptr<MemoryBuffer>> _memberBuffers; + std::vector<std::unique_ptr<File>> _filesReturned; +}; + +class ArchiveReader : public Reader { +public: + ArchiveReader(bool logLoading) : _logLoading(logLoading) {} + + bool canParse(file_magic magic, MemoryBufferRef) const override { + return magic == llvm::sys::fs::file_magic::archive; + } + + ErrorOr<std::unique_ptr<File>> loadFile(std::unique_ptr<MemoryBuffer> mb, + const Registry ®) const override { + StringRef path = mb->getBufferIdentifier(); + std::unique_ptr<File> ret = + llvm::make_unique<FileArchive>(std::move(mb), reg, path, _logLoading); + return std::move(ret); + } + +private: + bool _logLoading; +}; + +} // anonymous namespace + +void Registry::addSupportArchives(bool logLoading) { + add(std::unique_ptr<Reader>(new ArchiveReader(logLoading))); +} + +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.cpp new file mode 100644 index 000000000000..cb20907b3e30 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.cpp @@ -0,0 +1,172 @@ +//===- lib/FileFormat/MachO/ArchHandler.cpp -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + + +ArchHandler::ArchHandler() { +} + +ArchHandler::~ArchHandler() { +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create( + MachOLinkingContext::Arch arch) { + switch (arch) { + case MachOLinkingContext::arch_x86_64: + return create_x86_64(); + case MachOLinkingContext::arch_x86: + return create_x86(); + case MachOLinkingContext::arch_armv6: + case MachOLinkingContext::arch_armv7: + case MachOLinkingContext::arch_armv7s: + return create_arm(); + case MachOLinkingContext::arch_arm64: + return create_arm64(); + default: + llvm_unreachable("Unknown arch"); + } +} + + +bool ArchHandler::isLazyPointer(const Reference &ref) { + // A lazy bind entry is needed for a lazy pointer. + const StubInfo &info = stubInfo(); + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + if (ref.kindArch() != info.lazyPointerReferenceToFinal.arch) + return false; + return (ref.kindValue() == info.lazyPointerReferenceToFinal.kind); +} + + +ArchHandler::RelocPattern ArchHandler::relocPattern(const Relocation &reloc) { + assert((reloc.type & 0xFFF0) == 0); + uint16_t result = reloc.type; + if (reloc.scattered) + result |= rScattered; + if (reloc.pcRel) + result |= rPcRel; + if (reloc.isExtern) + result |= rExtern; + switch(reloc.length) { + case 0: + break; + case 1: + result |= rLength2; + break; + case 2: + result |= rLength4; + break; + case 3: + result |= rLength8; + break; + default: + llvm_unreachable("bad r_length"); + } + return result; +} + +normalized::Relocation +ArchHandler::relocFromPattern(ArchHandler::RelocPattern pattern) { + normalized::Relocation result; + result.offset = 0; + result.scattered = (pattern & rScattered); + result.type = (RelocationInfoType)(pattern & 0xF); + result.pcRel = (pattern & rPcRel); + result.isExtern = (pattern & rExtern); + result.value = 0; + result.symbol = 0; + switch (pattern & 0x300) { + case rLength1: + result.length = 0; + break; + case rLength2: + result.length = 1; + break; + case rLength4: + result.length = 2; + break; + case rLength8: + result.length = 3; + break; + } + return result; +} + +void ArchHandler::appendReloc(normalized::Relocations &relocs, uint32_t offset, + uint32_t symbol, uint32_t value, + RelocPattern pattern) { + normalized::Relocation reloc = relocFromPattern(pattern); + reloc.offset = offset; + reloc.symbol = symbol; + reloc.value = value; + relocs.push_back(reloc); +} + + +int16_t ArchHandler::readS16(const uint8_t *addr, bool isBig) { + return read16(addr, isBig); +} + +int32_t ArchHandler::readS32(const uint8_t *addr, bool isBig) { + return read32(addr, isBig); +} + +uint32_t ArchHandler::readU32(const uint8_t *addr, bool isBig) { + return read32(addr, isBig); +} + + int64_t ArchHandler::readS64(const uint8_t *addr, bool isBig) { + return read64(addr, isBig); +} + +bool ArchHandler::isDwarfCIE(bool isBig, const DefinedAtom *atom) { + assert(atom->contentType() == DefinedAtom::typeCFI); + if (atom->rawContent().size() < sizeof(uint32_t)) + return false; + uint32_t size = read32(atom->rawContent().data(), isBig); + + uint32_t idOffset = sizeof(uint32_t); + if (size == 0xffffffffU) + idOffset += sizeof(uint64_t); + + return read32(atom->rawContent().data() + idOffset, isBig) == 0; +} + +const Atom *ArchHandler::fdeTargetFunction(const DefinedAtom *fde) { + for (auto ref : *fde) { + if (ref->kindNamespace() == Reference::KindNamespace::mach_o && + ref->kindValue() == unwindRefToFunctionKind()) { + assert(ref->kindArch() == kindArch() && "unexpected Reference arch"); + return ref->target(); + } + } + + return nullptr; +} + +} // namespace mach_o +} // namespace lld + + + diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.h new file mode 100644 index 000000000000..70a63bd1004b --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.h @@ -0,0 +1,319 @@ +//===- lib/FileFormat/MachO/ArchHandler.h ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_ARCH_HANDLER_H +#define LLD_READER_WRITER_MACHO_ARCH_HANDLER_H + +#include "Atoms.h" +#include "File.h" +#include "MachONormalizedFile.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Error.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/Triple.h" + +namespace lld { +namespace mach_o { + +/// +/// The ArchHandler class handles all architecture specific aspects of +/// mach-o linking. +/// +class ArchHandler { +public: + virtual ~ArchHandler(); + + /// There is no public interface to subclasses of ArchHandler, so this + /// is the only way to instantiate an ArchHandler. + static std::unique_ptr<ArchHandler> create(MachOLinkingContext::Arch arch); + + /// Get (arch specific) kind strings used by Registry. + virtual const Registry::KindStrings *kindStrings() = 0; + + /// Convert mach-o Arch to Reference::KindArch. + virtual Reference::KindArch kindArch() = 0; + + /// Used by StubPass to update References to shared library functions + /// to be references to a stub. + virtual bool isCallSite(const Reference &) = 0; + + /// Used by GOTPass to locate GOT References + virtual bool isGOTAccess(const Reference &, bool &canBypassGOT) { + return false; + } + + /// Used by TLVPass to locate TLV References. + virtual bool isTLVAccess(const Reference &) const { return false; } + + /// Used by the TLVPass to update TLV References. + virtual void updateReferenceToTLV(const Reference *) {} + + /// Used by ShimPass to insert shims in branches that switch mode. + virtual bool isNonCallBranch(const Reference &) = 0; + + /// Used by GOTPass to update GOT References + virtual void updateReferenceToGOT(const Reference *, bool targetIsNowGOT) {} + + /// Does this architecture make use of __unwind_info sections for exception + /// handling? If so, it will need a separate pass to create them. + virtual bool needsCompactUnwind() = 0; + + /// Returns the kind of reference to use to synthesize a 32-bit image-offset + /// value, used in the __unwind_info section. + virtual Reference::KindValue imageOffsetKind() = 0; + + /// Returns the kind of reference to use to synthesize a 32-bit image-offset + /// indirect value. Used for personality functions in the __unwind_info + /// section. + virtual Reference::KindValue imageOffsetKindIndirect() = 0; + + /// Architecture specific compact unwind type that signals __eh_frame should + /// actually be used. + virtual uint32_t dwarfCompactUnwindType() = 0; + + /// Reference from an __eh_frame CIE atom to its personality function it's + /// describing. Usually pointer-sized and PC-relative, but differs in whether + /// it needs to be in relocatable objects. + virtual Reference::KindValue unwindRefToPersonalityFunctionKind() = 0; + + /// Reference from an __eh_frame FDE to the CIE it's based on. + virtual Reference::KindValue unwindRefToCIEKind() = 0; + + /// Reference from an __eh_frame FDE atom to the function it's + /// describing. Usually pointer-sized and PC-relative, but differs in whether + /// it needs to be in relocatable objects. + virtual Reference::KindValue unwindRefToFunctionKind() = 0; + + /// Reference from an __unwind_info entry of dwarfCompactUnwindType to the + /// required __eh_frame entry. On current architectures, the low 24 bits + /// represent the offset of the function's FDE entry from the start of + /// __eh_frame. + virtual Reference::KindValue unwindRefToEhFrameKind() = 0; + + /// Returns a pointer sized reference kind. On 64-bit targets this will + /// likely be something like pointer64, and pointer32 on 32-bit targets. + virtual Reference::KindValue pointerKind() = 0; + + virtual const Atom *fdeTargetFunction(const DefinedAtom *fde); + + /// Used by normalizedFromAtoms() to know where to generated rebasing and + /// binding info in final executables. + virtual bool isPointer(const Reference &) = 0; + + /// Used by normalizedFromAtoms() to know where to generated lazy binding + /// info in final executables. + virtual bool isLazyPointer(const Reference &); + + /// Returns true if the specified relocation is paired to the next relocation. + virtual bool isPairedReloc(const normalized::Relocation &) = 0; + + /// Prototype for a helper function. Given a sectionIndex and address, + /// finds the atom and offset with that atom of that address. + typedef std::function<llvm::Error (uint32_t sectionIndex, uint64_t addr, + const lld::Atom **, Reference::Addend *)> + FindAtomBySectionAndAddress; + + /// Prototype for a helper function. Given a symbolIndex, finds the atom + /// representing that symbol. + typedef std::function<llvm::Error (uint32_t symbolIndex, + const lld::Atom **)> FindAtomBySymbolIndex; + + /// Analyzes a relocation from a .o file and returns the info + /// (kind, target, addend) needed to instantiate a Reference. + /// Two helper functions are passed as parameters to find the target atom + /// given a symbol index or address. + virtual llvm::Error + getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBigEndian, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) = 0; + + /// Analyzes a pair of relocations from a .o file and returns the info + /// (kind, target, addend) needed to instantiate a Reference. + /// Two helper functions are passed as parameters to find the target atom + /// given a symbol index or address. + virtual llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) = 0; + + /// Prototype for a helper function. Given an atom, finds the symbol table + /// index for it in the output file. + typedef std::function<uint32_t (const Atom &atom)> FindSymbolIndexForAtom; + + /// Prototype for a helper function. Given an atom, finds the index + /// of the section that will contain the atom. + typedef std::function<uint32_t (const Atom &atom)> FindSectionIndexForAtom; + + /// Prototype for a helper function. Given an atom, finds the address + /// assigned to it in the output file. + typedef std::function<uint64_t (const Atom &atom)> FindAddressForAtom; + + /// Some architectures require local symbols on anonymous atoms. + virtual bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) { + return false; + } + + /// Copy raw content then apply all fixup References on an Atom. + virtual void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) = 0; + + /// Used in -r mode to convert a Reference to a mach-o relocation. + virtual void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom, + FindSectionIndexForAtom, + FindAddressForAtom, + normalized::Relocations&) = 0; + + /// Add arch-specific References. + virtual void addAdditionalReferences(MachODefinedAtom &atom) { } + + // Add Reference for data-in-code marker. + virtual void addDataInCodeReference(MachODefinedAtom &atom, uint32_t atomOff, + uint16_t length, uint16_t kind) { } + + /// Returns true if the specificed Reference value marks the start or end + /// of a data-in-code range in an atom. + virtual bool isDataInCodeTransition(Reference::KindValue refKind) { + return false; + } + + /// Returns the Reference value for a Reference that marks that start of + /// a data-in-code range. + virtual Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) { + return 0; + } + + /// Returns the Reference value for a Reference that marks that end of + /// a data-in-code range. + virtual Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) { + return 0; + } + + /// Only relevant for 32-bit arm archs. + virtual bool isThumbFunction(const DefinedAtom &atom) { return false; } + + /// Only relevant for 32-bit arm archs. + virtual const DefinedAtom *createShim(MachOFile &file, bool thumbToArm, + const DefinedAtom &) { + llvm_unreachable("shims only support on arm"); + } + + /// Does a given unwind-cfi atom represent a CIE (as opposed to an FDE). + static bool isDwarfCIE(bool isBig, const DefinedAtom *atom); + + struct ReferenceInfo { + Reference::KindArch arch; + uint16_t kind; + uint32_t offset; + int32_t addend; + }; + + struct OptionalRefInfo { + bool used; + uint16_t kind; + uint32_t offset; + int32_t addend; + }; + + /// Table of architecture specific information for creating stubs. + struct StubInfo { + const char* binderSymbolName; + ReferenceInfo lazyPointerReferenceToHelper; + ReferenceInfo lazyPointerReferenceToFinal; + ReferenceInfo nonLazyPointerReferenceToBinder; + uint8_t codeAlignment; + + uint32_t stubSize; + uint8_t stubBytes[16]; + ReferenceInfo stubReferenceToLP; + OptionalRefInfo optStubReferenceToLP; + + uint32_t stubHelperSize; + uint8_t stubHelperBytes[16]; + ReferenceInfo stubHelperReferenceToImm; + ReferenceInfo stubHelperReferenceToHelperCommon; + + DefinedAtom::ContentType stubHelperImageCacheContentType; + + uint32_t stubHelperCommonSize; + uint8_t stubHelperCommonAlignment; + uint8_t stubHelperCommonBytes[36]; + ReferenceInfo stubHelperCommonReferenceToCache; + OptionalRefInfo optStubHelperCommonReferenceToCache; + ReferenceInfo stubHelperCommonReferenceToBinder; + OptionalRefInfo optStubHelperCommonReferenceToBinder; + }; + + virtual const StubInfo &stubInfo() = 0; + +protected: + ArchHandler(); + + static std::unique_ptr<mach_o::ArchHandler> create_x86_64(); + static std::unique_ptr<mach_o::ArchHandler> create_x86(); + static std::unique_ptr<mach_o::ArchHandler> create_arm(); + static std::unique_ptr<mach_o::ArchHandler> create_arm64(); + + // Handy way to pack mach-o r_type and other bit fields into one 16-bit value. + typedef uint16_t RelocPattern; + enum { + rScattered = 0x8000, + rPcRel = 0x4000, + rExtern = 0x2000, + rLength1 = 0x0000, + rLength2 = 0x0100, + rLength4 = 0x0200, + rLength8 = 0x0300, + rLenArmLo = rLength1, + rLenArmHi = rLength2, + rLenThmbLo = rLength4, + rLenThmbHi = rLength8 + }; + /// Extract RelocPattern from normalized mach-o relocation. + static RelocPattern relocPattern(const normalized::Relocation &reloc); + /// Create normalized Relocation initialized from pattern. + static normalized::Relocation relocFromPattern(RelocPattern pattern); + /// One liner to add a relocation. + static void appendReloc(normalized::Relocations &relocs, uint32_t offset, + uint32_t symbol, uint32_t value, + RelocPattern pattern); + + + static int16_t readS16(const uint8_t *addr, bool isBig); + static int32_t readS32(const uint8_t *addr, bool isBig); + static uint32_t readU32(const uint8_t *addr, bool isBig); + static int64_t readS64(const uint8_t *addr, bool isBig); +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_ARCH_HANDLER_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp new file mode 100644 index 000000000000..7d1544854cf1 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp @@ -0,0 +1,1519 @@ +//===- lib/FileFormat/MachO/ArchHandler_arm.cpp ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::little32_t; + + +class ArchHandler_arm : public ArchHandler { +public: + ArchHandler_arm() = default; + ~ArchHandler_arm() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { return Reference::KindArch::ARM; } + + const ArchHandler::StubInfo &stubInfo() override; + bool isCallSite(const Reference &) override; + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + bool isNonCallBranch(const Reference &) override; + + bool needsCompactUnwind() override { + return false; + } + Reference::KindValue imageOffsetKind() override { + return invalid; + } + Reference::KindValue imageOffsetKindIndirect() override { + return invalid; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return invalid; + } + + Reference::KindValue unwindRefToCIEKind() override { + return invalid; + } + + Reference::KindValue unwindRefToFunctionKind() override { + return invalid; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return invalid; + } + + Reference::KindValue pointerKind() override { + return invalid; + } + + uint32_t dwarfCompactUnwindType() override { + // FIXME + return -1; + } + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom, + FindSectionIndexForAtom, + FindAddressForAtom, + normalized::Relocations &) override; + + void addAdditionalReferences(MachODefinedAtom &atom) override; + + bool isDataInCodeTransition(Reference::KindValue refKind) override { + switch (refKind) { + case modeThumbCode: + case modeArmCode: + case modeData: + return true; + default: + return false; + break; + } + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return atom.isThumb() ? modeThumbCode : modeArmCode; + } + + bool isThumbFunction(const DefinedAtom &atom) override; + const DefinedAtom *createShim(MachOFile &file, bool thumbToArm, + const DefinedAtom &) override; + +private: + friend class Thumb2ToArmShimAtom; + friend class ArmToThumbShimAtom; + + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfoArmPIC; + + enum ArmKind : Reference::KindValue { + invalid, /// for error condition + + modeThumbCode, /// Content starting at this offset is thumb. + modeArmCode, /// Content starting at this offset is arm. + modeData, /// Content starting at this offset is data. + + // Kinds found in mach-o .o files: + thumb_bl22, /// ex: bl _foo + thumb_b22, /// ex: b _foo + thumb_movw, /// ex: movw r1, :lower16:_foo + thumb_movt, /// ex: movt r1, :lower16:_foo + thumb_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4)) + thumb_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4)) + arm_bl24, /// ex: bl _foo + arm_b24, /// ex: b _foo + arm_movw, /// ex: movw r1, :lower16:_foo + arm_movt, /// ex: movt r1, :lower16:_foo + arm_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4)) + arm_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4)) + pointer32, /// ex: .long _foo + delta32, /// ex: .long _foo - . + + // Kinds introduced by Passes: + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + }; + + // Utility functions for inspecting/updating instructions. + static bool isThumbMovw(uint32_t instruction); + static bool isThumbMovt(uint32_t instruction); + static bool isArmMovw(uint32_t instruction); + static bool isArmMovt(uint32_t instruction); + static int32_t getDisplacementFromThumbBranch(uint32_t instruction, uint32_t); + static int32_t getDisplacementFromArmBranch(uint32_t instruction); + static uint16_t getWordFromThumbMov(uint32_t instruction); + static uint16_t getWordFromArmMov(uint32_t instruction); + static uint32_t clearThumbBit(uint32_t value, const Atom *target); + static uint32_t setDisplacementInArmBranch(uint32_t instr, int32_t disp, + bool targetIsThumb); + static uint32_t setDisplacementInThumbBranch(uint32_t instr, uint32_t ia, + int32_t disp, bool targetThumb); + static uint32_t setWordFromThumbMov(uint32_t instruction, uint16_t word); + static uint32_t setWordFromArmMov(uint32_t instruction, uint16_t word); + + StringRef stubName(const DefinedAtom &); + bool useExternalRelocationTo(const Atom &target); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, bool &thumbMode, + bool targetIsThumb); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, bool &thumbMode, + bool targetIsThumb); +}; + +//===----------------------------------------------------------------------===// +// ArchHandler_arm +//===----------------------------------------------------------------------===// + +const Registry::KindStrings ArchHandler_arm::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(modeThumbCode), + LLD_KIND_STRING_ENTRY(modeArmCode), + LLD_KIND_STRING_ENTRY(modeData), + LLD_KIND_STRING_ENTRY(thumb_bl22), + LLD_KIND_STRING_ENTRY(thumb_b22), + LLD_KIND_STRING_ENTRY(thumb_movw), + LLD_KIND_STRING_ENTRY(thumb_movt), + LLD_KIND_STRING_ENTRY(thumb_movw_funcRel), + LLD_KIND_STRING_ENTRY(thumb_movt_funcRel), + LLD_KIND_STRING_ENTRY(arm_bl24), + LLD_KIND_STRING_ENTRY(arm_b24), + LLD_KIND_STRING_ENTRY(arm_movw), + LLD_KIND_STRING_ENTRY(arm_movt), + LLD_KIND_STRING_ENTRY(arm_movw_funcRel), + LLD_KIND_STRING_ENTRY(arm_movt_funcRel), + LLD_KIND_STRING_ENTRY(pointer32), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_arm::_sStubInfoArmPIC = { + "dyld_stub_binder", + + // References in lazy pointer + { Reference::KindArch::ARM, pointer32, 0, 0 }, + { Reference::KindArch::ARM, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::ARM, pointer32, 0, 0 }, + + // arm code alignment 2^2 + 2, + + // Stub size and code + 16, + { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 12 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }, // .long L_foo$lazy_ptr - (L1$scv + 8) + { Reference::KindArch::ARM, delta32, 12, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 12, + { 0x00, 0xC0, 0x9F, 0xE5, // ldr ip, [pc, #0] + 0x00, 0x00, 0x00, 0xEA, // b _helperhelper + 0x00, 0x00, 0x00, 0x00 }, // .long lazy-info-offset + { Reference::KindArch::ARM, lazyImmediateLocation, 8, 0 }, + { Reference::KindArch::ARM, arm_b24, 4, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeGOT, + + // Stub Helper-Common size and code + 36, + // Stub helper alignment + 2, + { // push lazy-info-offset + 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]! + // push address of dyld_mageLoaderCache + 0x10, 0xC0, 0x9F, 0xE5, // ldr ip, L1 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]! + // jump through dyld_stub_binder + 0x08, 0xC0, 0x9F, 0xE5, // ldr ip, L2 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00, // L1: .long fFastStubGOTAtom - (helper+16) + 0x00, 0x00, 0x00, 0x00 }, // L2: .long dyld_stub_binder - (helper+28) + { Reference::KindArch::ARM, delta32, 28, 0xC }, + { false, 0, 0, 0 }, + { Reference::KindArch::ARM, delta32, 32, 0x04 }, + { false, 0, 0, 0 } +}; + +const ArchHandler::StubInfo &ArchHandler_arm::stubInfo() { + // If multiple kinds of stubs are supported, select which StubInfo here. + return _sStubInfoArmPIC; +} + +bool ArchHandler_arm::isCallSite(const Reference &ref) { + switch (ref.kindValue()) { + case thumb_b22: + case thumb_bl22: + case arm_b24: + case arm_bl24: + return true; + default: + return false; + } +} + +bool ArchHandler_arm::isPointer(const Reference &ref) { + return (ref.kindValue() == pointer32); +} + +bool ArchHandler_arm::isNonCallBranch(const Reference &ref) { + switch (ref.kindValue()) { + case thumb_b22: + case arm_b24: + return true; + default: + return false; + } +} + +bool ArchHandler_arm::isPairedReloc(const Relocation &reloc) { + switch (reloc.type) { + case ARM_RELOC_SECTDIFF: + case ARM_RELOC_LOCAL_SECTDIFF: + case ARM_RELOC_HALF_SECTDIFF: + case ARM_RELOC_HALF: + return true; + default: + return false; + } +} + +/// Trace references from stub atom to lazy pointer to target and get its name. +StringRef ArchHandler_arm::stubName(const DefinedAtom &stubAtom) { + assert(stubAtom.contentType() == DefinedAtom::typeStub); + for (const Reference *ref : stubAtom) { + if (const DefinedAtom* lp = dyn_cast<DefinedAtom>(ref->target())) { + if (lp->contentType() != DefinedAtom::typeLazyPointer) + continue; + for (const Reference *ref2 : *lp) { + if (ref2->kindValue() != lazyPointer) + continue; + return ref2->target()->name(); + } + } + } + return "stub"; +} + +/// Extract displacement from an ARM b/bl/blx instruction. +int32_t ArchHandler_arm::getDisplacementFromArmBranch(uint32_t instruction) { + // Sign-extend imm24 + int32_t displacement = (instruction & 0x00FFFFFF) << 2; + if ((displacement & 0x02000000) != 0) + displacement |= 0xFC000000; + // If this is BLX and H bit set, add 2. + if ((instruction & 0xFF000000) == 0xFB000000) + displacement += 2; + return displacement; +} + +/// Update an ARM b/bl/blx instruction, switching bl <-> blx as needed. +uint32_t ArchHandler_arm::setDisplacementInArmBranch(uint32_t instruction, + int32_t displacement, + bool targetIsThumb) { + assert((displacement <= 33554428) && (displacement > (-33554432)) + && "arm branch out of range"); + bool is_blx = ((instruction & 0xF0000000) == 0xF0000000); + uint32_t newInstruction = (instruction & 0xFF000000); + uint32_t h = 0; + if (targetIsThumb) { + // Force use of BLX. + newInstruction = 0xFA000000; + if (!is_blx) { + assert(((instruction & 0xF0000000) == 0xE0000000) + && "no conditional arm blx"); + assert(((instruction & 0xFF000000) == 0xEB000000) + && "no arm pc-rel BX instruction"); + } + if (displacement & 2) + h = 1; + } + else { + // Force use of B/BL. + if (is_blx) + newInstruction = 0xEB000000; + } + newInstruction |= (h << 24) | ((displacement >> 2) & 0x00FFFFFF); + return newInstruction; +} + +/// Extract displacement from a thumb b/bl/blx instruction. +int32_t ArchHandler_arm::getDisplacementFromThumbBranch(uint32_t instruction, + uint32_t instrAddr) { + bool is_blx = ((instruction & 0xD000F800) == 0xC000F000); + uint32_t s = (instruction >> 10) & 0x1; + uint32_t j1 = (instruction >> 29) & 0x1; + uint32_t j2 = (instruction >> 27) & 0x1; + uint32_t imm10 = instruction & 0x3FF; + uint32_t imm11 = (instruction >> 16) & 0x7FF; + uint32_t i1 = (j1 == s); + uint32_t i2 = (j2 == s); + uint32_t dis = + (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1); + int32_t sdis = dis; + int32_t result = s ? (sdis | 0xFE000000) : sdis; + if (is_blx && (instrAddr & 0x2)) { + // The thumb blx instruction always has low bit of imm11 as zero. The way + // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that + // the blx instruction always 4-byte aligns the pc before adding the + // displacement from the blx. We must emulate that when decoding this. + result -= 2; + } + return result; +} + +/// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed. +uint32_t ArchHandler_arm::setDisplacementInThumbBranch(uint32_t instruction, + uint32_t instrAddr, + int32_t displacement, + bool targetIsThumb) { + assert((displacement <= 16777214) && (displacement > (-16777216)) + && "thumb branch out of range"); + bool is_bl = ((instruction & 0xD000F800) == 0xD000F000); + bool is_blx = ((instruction & 0xD000F800) == 0xC000F000); + bool is_b = ((instruction & 0xD000F800) == 0x9000F000); + uint32_t newInstruction = (instruction & 0xD000F800); + if (is_bl || is_blx) { + if (targetIsThumb) { + newInstruction = 0xD000F000; // Use bl + } else { + newInstruction = 0xC000F000; // Use blx + // See note in getDisplacementFromThumbBranch() about blx. + if (instrAddr & 0x2) + displacement += 2; + } + } else if (is_b) { + assert(targetIsThumb && "no pc-rel thumb branch instruction that " + "switches to arm mode"); + } + else { + llvm_unreachable("thumb branch22 reloc on a non-branch instruction"); + } + uint32_t s = (uint32_t)(displacement >> 24) & 0x1; + uint32_t i1 = (uint32_t)(displacement >> 23) & 0x1; + uint32_t i2 = (uint32_t)(displacement >> 22) & 0x1; + uint32_t imm10 = (uint32_t)(displacement >> 12) & 0x3FF; + uint32_t imm11 = (uint32_t)(displacement >> 1) & 0x7FF; + uint32_t j1 = (i1 == s); + uint32_t j2 = (i2 == s); + uint32_t nextDisp = (j1 << 13) | (j2 << 11) | imm11; + uint32_t firstDisp = (s << 10) | imm10; + newInstruction |= (nextDisp << 16) | firstDisp; + return newInstruction; +} + +bool ArchHandler_arm::isThumbMovw(uint32_t instruction) { + return (instruction & 0x8000FBF0) == 0x0000F240; +} + +bool ArchHandler_arm::isThumbMovt(uint32_t instruction) { + return (instruction & 0x8000FBF0) == 0x0000F2C0; +} + +bool ArchHandler_arm::isArmMovw(uint32_t instruction) { + return (instruction & 0x0FF00000) == 0x03000000; +} + +bool ArchHandler_arm::isArmMovt(uint32_t instruction) { + return (instruction & 0x0FF00000) == 0x03400000; +} + +uint16_t ArchHandler_arm::getWordFromThumbMov(uint32_t instruction) { + assert(isThumbMovw(instruction) || isThumbMovt(instruction)); + uint32_t i = ((instruction & 0x00000400) >> 10); + uint32_t imm4 = (instruction & 0x0000000F); + uint32_t imm3 = ((instruction & 0x70000000) >> 28); + uint32_t imm8 = ((instruction & 0x00FF0000) >> 16); + return (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8; +} + +uint16_t ArchHandler_arm::getWordFromArmMov(uint32_t instruction) { + assert(isArmMovw(instruction) || isArmMovt(instruction)); + uint32_t imm4 = ((instruction & 0x000F0000) >> 16); + uint32_t imm12 = (instruction & 0x00000FFF); + return (imm4 << 12) | imm12; +} + +uint32_t ArchHandler_arm::setWordFromThumbMov(uint32_t instr, uint16_t word) { + assert(isThumbMovw(instr) || isThumbMovt(instr)); + uint32_t imm4 = (word & 0xF000) >> 12; + uint32_t i = (word & 0x0800) >> 11; + uint32_t imm3 = (word & 0x0700) >> 8; + uint32_t imm8 = word & 0x00FF; + return (instr & 0x8F00FBF0) | imm4 | (i << 10) | (imm3 << 28) | (imm8 << 16); +} + +uint32_t ArchHandler_arm::setWordFromArmMov(uint32_t instr, uint16_t word) { + assert(isArmMovw(instr) || isArmMovt(instr)); + uint32_t imm4 = (word & 0xF000) >> 12; + uint32_t imm12 = word & 0x0FFF; + return (instr & 0xFFF0F000) | (imm4 << 16) | imm12; +} + +uint32_t ArchHandler_arm::clearThumbBit(uint32_t value, const Atom *target) { + // The assembler often adds one to the address of a thumb function. + // We need to undo that so it does not look like an addend. + if (value & 1) { + if (isa<DefinedAtom>(target)) { + const MachODefinedAtom *machoTarget = + reinterpret_cast<const MachODefinedAtom *>(target); + if (machoTarget->isThumb()) + value &= -2; // mask off thumb-bit + } + } + return value; +} + +llvm::Error ArchHandler_arm::getReferenceInfo( + const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + uint32_t instruction = *(const ulittle32_t *)fixupContent; + int32_t displacement; + switch (relocPattern(reloc)) { + case ARM_THUMB_RELOC_BR22 | rPcRel | rExtern | rLength4: + // ex: bl _foo (and _foo is undefined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // Instruction contains branch to addend. + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + *addend = fixupAddress + 4 + displacement; + return llvm::Error::success(); + case ARM_THUMB_RELOC_BR22 | rPcRel | rLength4: + // ex: bl _foo (and _foo is defined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + targetAddress = fixupAddress + 4 + displacement; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4: + // ex: bl _foo+4 (and _foo is defined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + targetAddress = fixupAddress + 4 + displacement; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + // reloc.value is target atom's address. Instruction contains branch + // to atom+addend. + *addend += (targetAddress - reloc.value); + return llvm::Error::success(); + case ARM_RELOC_BR24 | rPcRel | rExtern | rLength4: + // ex: bl _foo (and _foo is undefined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // Instruction contains branch to addend. + displacement = getDisplacementFromArmBranch(instruction); + *addend = fixupAddress + 8 + displacement; + return llvm::Error::success(); + case ARM_RELOC_BR24 | rPcRel | rLength4: + // ex: bl _foo (and _foo is defined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + displacement = getDisplacementFromArmBranch(instruction); + targetAddress = fixupAddress + 8 + displacement; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ARM_RELOC_BR24 | rScattered | rPcRel | rLength4: + // ex: bl _foo+4 (and _foo is defined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + displacement = getDisplacementFromArmBranch(instruction); + targetAddress = fixupAddress + 8 + displacement; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + // reloc.value is target atom's address. Instruction contains branch + // to atom+addend. + *addend += (targetAddress - reloc.value); + return llvm::Error::success(); + case ARM_RELOC_VANILLA | rExtern | rLength4: + // ex: .long _foo (and _foo is undefined) + *kind = pointer32; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = instruction; + return llvm::Error::success(); + case ARM_RELOC_VANILLA | rLength4: + // ex: .long _foo (and _foo is defined) + *kind = pointer32; + if (auto ec = atomFromAddress(reloc.symbol, instruction, target, addend)) + return ec; + *addend = clearThumbBit((uint32_t) * addend, *target); + return llvm::Error::success(); + case ARM_RELOC_VANILLA | rScattered | rLength4: + // ex: .long _foo+a (and _foo is defined) + *kind = pointer32; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend += (clearThumbBit(instruction, *target) - reloc.value); + return llvm::Error::success(); + default: + return llvm::make_error<GenericError>("unsupported arm relocation type"); + } + return llvm::Error::success(); +} + +llvm::Error +ArchHandler_arm::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddr, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + bool pointerDiff = false; + bool funcRel; + bool top; + bool thumbReloc; + switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rScattered | rLenThmbLo): + // ex: movw r1, :lower16:(_x-L1) [thumb mode] + *kind = thumb_movw_funcRel; + funcRel = true; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rScattered | rLenThmbHi): + // ex: movt r1, :upper16:(_x-L1) [thumb mode] + *kind = thumb_movt_funcRel; + funcRel = true; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rScattered | rLenArmLo): + // ex: movw r1, :lower16:(_x-L1) [arm mode] + *kind = arm_movw_funcRel; + funcRel = true; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rScattered | rLenArmHi): + // ex: movt r1, :upper16:(_x-L1) [arm mode] + *kind = arm_movt_funcRel; + funcRel = true; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_x [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_x [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_x [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_x [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rScattered | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_x+a [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rScattered | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_x+a [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rScattered | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_x+a [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rScattered | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_x+a [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rExtern | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_undef [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rExtern | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_undef [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rExtern | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_undef [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rExtern | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_undef [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_SECTDIFF | rScattered | rLength4) << 16 | + ARM_RELOC_PAIR | rScattered | rLength4): + case ((ARM_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | + ARM_RELOC_PAIR | rScattered | rLength4): + // ex: .long _foo - . + pointerDiff = true; + break; + default: + return llvm::make_error<GenericError>("unsupported arm relocation pair"); + } + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint32_t instruction = *(const ulittle32_t *)fixupContent; + uint32_t value; + uint32_t fromAddress; + uint32_t toAddress; + uint16_t instruction16; + uint16_t other16; + const lld::Atom *fromTarget; + Reference::Addend offsetInTo; + Reference::Addend offsetInFrom; + if (pointerDiff) { + toAddress = reloc1.value; + fromAddress = reloc2.value; + if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) + return ec; + if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) + return ec; + if (scatterable && (fromTarget != inAtom)) + return llvm::make_error<GenericError>( + "SECTDIFF relocation where subtrahend label is not in atom"); + *kind = delta32; + value = clearThumbBit(instruction, *target); + *addend = (int32_t)(value - (toAddress - fixupAddress)); + } else if (funcRel) { + toAddress = reloc1.value; + fromAddress = reloc2.value; + if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) + return ec; + if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) + return ec; + if (fromTarget != inAtom) + return llvm::make_error<GenericError>("ARM_RELOC_HALF_SECTDIFF relocation" + " where subtrahend label is not in atom"); + other16 = (reloc2.offset & 0xFFFF); + if (thumbReloc) { + if (top) { + if (!isThumbMovt(instruction)) + return llvm::make_error<GenericError>("expected movt instruction"); + } + else { + if (!isThumbMovw(instruction)) + return llvm::make_error<GenericError>("expected movw instruction"); + } + instruction16 = getWordFromThumbMov(instruction); + } + else { + if (top) { + if (!isArmMovt(instruction)) + return llvm::make_error<GenericError>("expected movt instruction"); + } + else { + if (!isArmMovw(instruction)) + return llvm::make_error<GenericError>("expected movw instruction"); + } + instruction16 = getWordFromArmMov(instruction); + } + if (top) + value = (instruction16 << 16) | other16; + else + value = (other16 << 16) | instruction16; + value = clearThumbBit(value, *target); + int64_t ta = (int64_t) value - (toAddress - fromAddress); + *addend = ta - offsetInFrom; + return llvm::Error::success(); + } else { + uint32_t sectIndex; + if (thumbReloc) { + if (top) { + if (!isThumbMovt(instruction)) + return llvm::make_error<GenericError>("expected movt instruction"); + } + else { + if (!isThumbMovw(instruction)) + return llvm::make_error<GenericError>("expected movw instruction"); + } + instruction16 = getWordFromThumbMov(instruction); + } + else { + if (top) { + if (!isArmMovt(instruction)) + return llvm::make_error<GenericError>("expected movt instruction"); + } + else { + if (!isArmMovw(instruction)) + return llvm::make_error<GenericError>("expected movw instruction"); + } + instruction16 = getWordFromArmMov(instruction); + } + other16 = (reloc2.offset & 0xFFFF); + if (top) + value = (instruction16 << 16) | other16; + else + value = (other16 << 16) | instruction16; + if (reloc1.isExtern) { + if (auto ec = atomFromSymbolIndex(reloc1.symbol, target)) + return ec; + *addend = value; + } else { + if (reloc1.scattered) { + toAddress = reloc1.value; + sectIndex = 0; + } else { + toAddress = value; + sectIndex = reloc1.symbol; + } + if (auto ec = atomFromAddr(sectIndex, toAddress, target, &offsetInTo)) + return ec; + *addend = value - toAddress; + } + } + + return llvm::Error::success(); +} + +void ArchHandler_arm::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool &thumbMode, bool targetIsThumb) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + int32_t displacement; + uint16_t value16; + uint32_t value32; + switch (static_cast<ArmKind>(ref.kindValue())) { + case modeThumbCode: + thumbMode = true; + break; + case modeArmCode: + thumbMode = false; + break; + case modeData: + break; + case thumb_b22: + case thumb_bl22: + assert(thumbMode); + displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); + value32 = setDisplacementInThumbBranch(*loc32, fixupAddress, + displacement, targetIsThumb); + *loc32 = value32; + break; + case thumb_movw: + assert(thumbMode); + value16 = (targetAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt: + assert(thumbMode); + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movw_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case arm_b24: + case arm_bl24: + assert(!thumbMode); + displacement = (targetAddress - (fixupAddress + 8)) + ref.addend(); + value32 = setDisplacementInArmBranch(*loc32, displacement, targetIsThumb); + *loc32 = value32; + break; + case arm_movw: + assert(!thumbMode); + value16 = (targetAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt: + assert(!thumbMode); + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movw_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case pointer32: + if (targetIsThumb) + *loc32 = targetAddress + ref.addend() + 1; + else + *loc32 = targetAddress + ref.addend(); + break; + case delta32: + if (targetIsThumb) + *loc32 = targetAddress - fixupAddress + ref.addend() + 1; + else + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case lazyPointer: + // do nothing + break; + case lazyImmediateLocation: + *loc32 = ref.addend(); + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::generateAtomContent(const DefinedAtom &atom, + bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. + bool thumbMode = false; + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + bool targetIsThumb = false; + if (const DefinedAtom *defTarg = dyn_cast<DefinedAtom>(target)) { + targetAddress = findAddress(*target); + targetIsThumb = isThumbFunction(*defTarg); + } + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, thumbMode, + targetIsThumb); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, thumbMode, targetIsThumb); + } + } +} + +bool ArchHandler_arm::useExternalRelocationTo(const Atom &target) { + // Undefined symbols are referenced via external relocations. + if (isa<UndefinedAtom>(&target)) + return true; + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(&target)) { + switch (defAtom->merge()) { + case DefinedAtom::mergeAsTentative: + // Tentative definitions are referenced via external relocations. + return true; + case DefinedAtom::mergeAsWeak: + case DefinedAtom::mergeAsWeakAndAddressUsed: + // Global weak-defs are referenced via external relocations. + return (defAtom->scope() == DefinedAtom::scopeGlobal); + default: + break; + } + } + // Everything else is reference via an internal relocation. + return false; +} + +void ArchHandler_arm::applyFixupRelocatable(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool &thumbMode, + bool targetIsThumb) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + int32_t displacement; + uint16_t value16; + uint32_t value32; + bool targetIsUndef = isa<UndefinedAtom>(ref.target()); + switch (static_cast<ArmKind>(ref.kindValue())) { + case modeThumbCode: + thumbMode = true; + break; + case modeArmCode: + thumbMode = false; + break; + case modeData: + break; + case thumb_b22: + case thumb_bl22: + assert(thumbMode); + if (useExternalReloc) + displacement = (ref.addend() - (fixupAddress + 4)); + else + displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); + value32 = setDisplacementInThumbBranch(*loc32, fixupAddress, + displacement, + targetIsUndef || targetIsThumb); + *loc32 = value32; + break; + case thumb_movw: + assert(thumbMode); + if (useExternalReloc) + value16 = ref.addend() & 0xFFFF; + else + value16 = (targetAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt: + assert(thumbMode); + if (useExternalReloc) + value16 = ref.addend() >> 16; + else + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movw_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case arm_b24: + case arm_bl24: + assert(!thumbMode); + if (useExternalReloc) + displacement = (ref.addend() - (fixupAddress + 8)); + else + displacement = (targetAddress - (fixupAddress + 8)) + ref.addend(); + value32 = setDisplacementInArmBranch(*loc32, displacement, + targetIsThumb); + *loc32 = value32; + break; + case arm_movw: + assert(!thumbMode); + if (useExternalReloc) + value16 = ref.addend() & 0xFFFF; + else + value16 = (targetAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt: + assert(!thumbMode); + if (useExternalReloc) + value16 = ref.addend() >> 16; + else + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movw_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case pointer32: + *loc32 = targetAddress + ref.addend(); + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + uint32_t targetAtomAddress; + uint32_t fromAtomAddress; + uint16_t other16; + switch (static_cast<ArmKind>(ref.kindValue())) { + case modeThumbCode: + case modeArmCode: + case modeData: + // Do nothing. + break; + case thumb_b22: + case thumb_bl22: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_THUMB_RELOC_BR22 | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_THUMB_RELOC_BR22 | rPcRel | rLength4); + } + break; + case thumb_movw: + if (useExternalReloc) { + other16 = ref.addend() >> 16; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } else { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } + } + break; + case thumb_movt: + if (useExternalReloc) { + other16 = ref.addend() & 0xFFFF; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } else { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } + } + break; + case thumb_movw_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenThmbLo); + break; + case thumb_movt_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenThmbHi); + break; + case arm_b24: + case arm_bl24: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_BR24 | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_BR24 | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_BR24 | rPcRel | rLength4); + } + break; + case arm_movw: + if (useExternalReloc) { + other16 = ref.addend() >> 16; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } else { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } + } + break; + case arm_movt: + if (useExternalReloc) { + other16 = ref.addend() & 0xFFFF; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } else { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } + } + break; + case arm_movw_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenArmLo); + break; + case arm_movt_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenArmHi); + break; + case pointer32: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_VANILLA | rExtern | rLength4); + } + else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_VANILLA | rScattered | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_VANILLA | rLength4); + } + break; + case delta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + ARM_RELOC_PAIR | rScattered | rLength4); + break; + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::addAdditionalReferences(MachODefinedAtom &atom) { + if (atom.isThumb()) { + atom.addReference(Reference::KindNamespace::mach_o, + Reference::KindArch::ARM, modeThumbCode, 0, &atom, 0); + } +} + +bool ArchHandler_arm::isThumbFunction(const DefinedAtom &atom) { + for (const Reference *ref : atom) { + if (ref->offsetInAtom() != 0) + return false; + if (ref->kindNamespace() != Reference::KindNamespace::mach_o) + continue; + assert(ref->kindArch() == Reference::KindArch::ARM); + if (ref->kindValue() == modeThumbCode) + return true; + } + return false; +} + +class Thumb2ToArmShimAtom : public SimpleDefinedAtom { +public: + Thumb2ToArmShimAtom(MachOFile &file, StringRef targetName, + const DefinedAtom &target) + : SimpleDefinedAtom(file) { + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::modeThumbCode, 0, this, 0); + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::delta32, 8, &target, 0); + std::string name = std::string(targetName) + "$shim"; + StringRef tmp(name); + _name = tmp.copy(file.allocator()); + } + + ~Thumb2ToArmShimAtom() override = default; + + StringRef name() const override { + return _name; + } + + ContentType contentType() const override { + return DefinedAtom::typeCode; + } + + Alignment alignment() const override { return 4; } + + uint64_t size() const override { + return 12; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t bytes[] = + { 0xDF, 0xF8, 0x04, 0xC0, // ldr ip, pc + 4 + 0xFF, 0x44, // add ip, pc, ip + 0x60, 0x47, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }; // .long target - this + assert(sizeof(bytes) == size()); + return llvm::makeArrayRef(bytes, sizeof(bytes)); + } +private: + StringRef _name; +}; + +class ArmToThumbShimAtom : public SimpleDefinedAtom { +public: + ArmToThumbShimAtom(MachOFile &file, StringRef targetName, + const DefinedAtom &target) + : SimpleDefinedAtom(file) { + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::delta32, 12, &target, 0); + std::string name = std::string(targetName) + "$shim"; + StringRef tmp(name); + _name = tmp.copy(file.allocator()); + } + + ~ArmToThumbShimAtom() override = default; + + StringRef name() const override { + return _name; + } + + ContentType contentType() const override { + return DefinedAtom::typeCode; + } + + Alignment alignment() const override { return 4; } + + uint64_t size() const override { + return 16; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t bytes[] = + { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 4 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x1C, 0xFF, 0x2F, 0xE1, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }; // .long target - this + assert(sizeof(bytes) == size()); + return llvm::makeArrayRef(bytes, sizeof(bytes)); + } +private: + StringRef _name; +}; + +const DefinedAtom *ArchHandler_arm::createShim(MachOFile &file, + bool thumbToArm, + const DefinedAtom &target) { + bool isStub = (target.contentType() == DefinedAtom::typeStub); + StringRef targetName = isStub ? stubName(target) : target.name(); + if (thumbToArm) + return new (file.allocator()) Thumb2ToArmShimAtom(file, targetName, target); + else + return new (file.allocator()) ArmToThumbShimAtom(file, targetName, target); +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_arm() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_arm()); +} + +} // namespace mach_o +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp new file mode 100644 index 000000000000..392a1be5b3d0 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp @@ -0,0 +1,898 @@ +//===- lib/FileFormat/MachO/ArchHandler_arm64.cpp -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::ulittle64_t; + +using llvm::support::little32_t; +using llvm::support::little64_t; + +class ArchHandler_arm64 : public ArchHandler { +public: + ArchHandler_arm64() = default; + ~ArchHandler_arm64() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { + return Reference::KindArch::AArch64; + } + + /// Used by GOTPass to locate GOT References + bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + switch (ref.kindValue()) { + case gotPage21: + case gotOffset12: + canBypassGOT = true; + return true; + case delta32ToGOT: + canBypassGOT = false; + return true; + case unwindCIEToPersonalityFunction: + canBypassGOT = false; + return true; + case imageOffsetGot: + canBypassGOT = false; + return true; + default: + return false; + } + } + + /// Used by GOTPass to update GOT References. + void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { + // If GOT slot was instanciated, transform: + // gotPage21/gotOffset12 -> page21/offset12scale8 + // If GOT slot optimized away, transform: + // gotPage21/gotOffset12 -> page21/addOffset12 + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::AArch64); + switch (ref->kindValue()) { + case gotPage21: + const_cast<Reference *>(ref)->setKindValue(page21); + break; + case gotOffset12: + const_cast<Reference *>(ref)->setKindValue(targetNowGOT ? + offset12scale8 : addOffset12); + break; + case delta32ToGOT: + const_cast<Reference *>(ref)->setKindValue(delta32); + break; + case imageOffsetGot: + const_cast<Reference *>(ref)->setKindValue(imageOffset); + break; + default: + llvm_unreachable("Not a GOT reference"); + } + } + + const StubInfo &stubInfo() override { return _sStubInfo; } + + bool isCallSite(const Reference &) override; + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + bool needsCompactUnwind() override { + return true; + } + Reference::KindValue imageOffsetKind() override { + return imageOffset; + } + Reference::KindValue imageOffsetKindIndirect() override { + return imageOffsetGot; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return unwindCIEToPersonalityFunction; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override { + return unwindFDEToFunction; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return unwindInfoToEhFrame; + } + + Reference::KindValue pointerKind() override { + return pointer64; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x03000000; + } + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override { + return (atom->contentType() == DefinedAtom::typeCString); + } + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum Arm64Kind : Reference::KindValue { + invalid, /// for error condition + + // Kinds found in mach-o .o files: + branch26, /// ex: bl _foo + page21, /// ex: adrp x1, _foo@PAGE + offset12, /// ex: ldrb w0, [x1, _foo@PAGEOFF] + offset12scale2, /// ex: ldrs w0, [x1, _foo@PAGEOFF] + offset12scale4, /// ex: ldr w0, [x1, _foo@PAGEOFF] + offset12scale8, /// ex: ldr x0, [x1, _foo@PAGEOFF] + offset12scale16, /// ex: ldr q0, [x1, _foo@PAGEOFF] + gotPage21, /// ex: adrp x1, _foo@GOTPAGE + gotOffset12, /// ex: ldr w0, [x1, _foo@GOTPAGEOFF] + tlvPage21, /// ex: adrp x1, _foo@TLVPAGE + tlvOffset12, /// ex: ldr w0, [x1, _foo@TLVPAGEOFF] + + pointer64, /// ex: .quad _foo + delta64, /// ex: .quad _foo - . + delta32, /// ex: .long _foo - . + negDelta32, /// ex: .long . - _foo + pointer64ToGOT, /// ex: .quad _foo@GOT + delta32ToGOT, /// ex: .long _foo@GOT - . + + // Kinds introduced by Passes: + addOffset12, /// Location contains LDR to change into ADD. + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + imageOffset, /// Location contains offset of atom in final image + imageOffsetGot, /// Location contains offset of GOT entry for atom in + /// final image (typically personality function). + unwindCIEToPersonalityFunction, /// Nearly delta32ToGOT, but cannot be + /// rematerialized in relocatable object + /// (yay for implicit contracts!). + unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in + /// relocatable object (yay for implicit contracts!). + unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to + /// refer to __eh_frame entry. + }; + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, bool targetUnnamed); + + // Utility functions for inspecting/updating instructions. + static uint32_t setDisplacementInBranch26(uint32_t instr, int32_t disp); + static uint32_t setDisplacementInADRP(uint32_t instr, int64_t disp); + static Arm64Kind offset12KindFromInstruction(uint32_t instr); + static uint32_t setImm12(uint32_t instr, uint32_t offset); +}; + +const Registry::KindStrings ArchHandler_arm64::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(branch26), + LLD_KIND_STRING_ENTRY(page21), + LLD_KIND_STRING_ENTRY(offset12), + LLD_KIND_STRING_ENTRY(offset12scale2), + LLD_KIND_STRING_ENTRY(offset12scale4), + LLD_KIND_STRING_ENTRY(offset12scale8), + LLD_KIND_STRING_ENTRY(offset12scale16), + LLD_KIND_STRING_ENTRY(gotPage21), + LLD_KIND_STRING_ENTRY(gotOffset12), + LLD_KIND_STRING_ENTRY(tlvPage21), + LLD_KIND_STRING_ENTRY(tlvOffset12), + LLD_KIND_STRING_ENTRY(pointer64), + LLD_KIND_STRING_ENTRY(delta64), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(pointer64ToGOT), + LLD_KIND_STRING_ENTRY(delta32ToGOT), + + LLD_KIND_STRING_ENTRY(addOffset12), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_ENTRY(imageOffset), + LLD_KIND_STRING_ENTRY(imageOffsetGot), + LLD_KIND_STRING_ENTRY(unwindCIEToPersonalityFunction), + LLD_KIND_STRING_ENTRY(unwindFDEToFunction), + LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame), + + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_arm64::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::AArch64, pointer64, 0, 0 }, + { Reference::KindArch::AArch64, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::AArch64, pointer64, 0, 0 }, + + // arm64 code alignment 2^1 + 1, + + // Stub size and code + 12, + { 0x10, 0x00, 0x00, 0x90, // ADRP X16, lazy_pointer@page + 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16, lazy_pointer@pageoff] + 0x00, 0x02, 0x1F, 0xD6 }, // BR X16 + { Reference::KindArch::AArch64, page21, 0, 0 }, + { true, offset12scale8, 4, 0 }, + + // Stub Helper size and code + 12, + { 0x50, 0x00, 0x00, 0x18, // LDR W16, L0 + 0x00, 0x00, 0x00, 0x14, // LDR B helperhelper + 0x00, 0x00, 0x00, 0x00 }, // L0: .long 0 + { Reference::KindArch::AArch64, lazyImmediateLocation, 8, 0 }, + { Reference::KindArch::AArch64, branch26, 4, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeGOT, + + // Stub Helper-Common size and code + 24, + // Stub helper alignment + 2, + { 0x11, 0x00, 0x00, 0x90, // ADRP X17, dyld_ImageLoaderCache@page + 0x31, 0x02, 0x00, 0x91, // ADD X17, X17, dyld_ImageLoaderCache@pageoff + 0xF0, 0x47, 0xBF, 0xA9, // STP X16/X17, [SP, #-16]! + 0x10, 0x00, 0x00, 0x90, // ADRP X16, _fast_lazy_bind@page + 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16,_fast_lazy_bind@pageoff] + 0x00, 0x02, 0x1F, 0xD6 }, // BR X16 + { Reference::KindArch::AArch64, page21, 0, 0 }, + { true, offset12, 4, 0 }, + { Reference::KindArch::AArch64, page21, 12, 0 }, + { true, offset12scale8, 16, 0 } +}; + +bool ArchHandler_arm64::isCallSite(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + return (ref.kindValue() == branch26); +} + +bool ArchHandler_arm64::isPointer(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + Reference::KindValue kind = ref.kindValue(); + return (kind == pointer64); +} + +bool ArchHandler_arm64::isPairedReloc(const Relocation &r) { + return ((r.type == ARM64_RELOC_ADDEND) || (r.type == ARM64_RELOC_SUBTRACTOR)); +} + +uint32_t ArchHandler_arm64::setDisplacementInBranch26(uint32_t instr, + int32_t displacement) { + assert((displacement <= 134217727) && (displacement > (-134217728)) && + "arm64 branch out of range"); + return (instr & 0xFC000000) | ((uint32_t)(displacement >> 2) & 0x03FFFFFF); +} + +uint32_t ArchHandler_arm64::setDisplacementInADRP(uint32_t instruction, + int64_t displacement) { + assert((displacement <= 0x100000000LL) && (displacement > (-0x100000000LL)) && + "arm64 ADRP out of range"); + assert(((instruction & 0x9F000000) == 0x90000000) && + "reloc not on ADRP instruction"); + uint32_t immhi = (displacement >> 9) & (0x00FFFFE0); + uint32_t immlo = (displacement << 17) & (0x60000000); + return (instruction & 0x9F00001F) | immlo | immhi; +} + +ArchHandler_arm64::Arm64Kind +ArchHandler_arm64::offset12KindFromInstruction(uint32_t instruction) { + if (instruction & 0x08000000) { + switch ((instruction >> 30) & 0x3) { + case 0: + if ((instruction & 0x04800000) == 0x04800000) + return offset12scale16; + return offset12; + case 1: + return offset12scale2; + case 2: + return offset12scale4; + case 3: + return offset12scale8; + } + } + return offset12; +} + +uint32_t ArchHandler_arm64::setImm12(uint32_t instruction, uint32_t offset) { + assert(((offset & 0xFFFFF000) == 0) && "imm12 offset out of range"); + uint32_t imm12 = offset << 10; + return (instruction & 0xFFC003FF) | imm12; +} + +llvm::Error ArchHandler_arm64::getReferenceInfo( + const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + switch (relocPattern(reloc)) { + case ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4: + // ex: bl _foo + *kind = branch26; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@PAGE + *kind = page21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@PAGEOFF] + *kind = offset12KindFromInstruction(*(const little32_t *)fixupContent); + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@GOTPAGE + *kind = gotPage21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@GOTPAGEOFF] + *kind = gotOffset12; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@TLVPAGE + *kind = tlvPage21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@TLVPAGEOFF] + *kind = tlvOffset12; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_UNSIGNED | rExtern | rLength8: + // ex: .quad _foo + N + *kind = pointer64; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little64_t *)fixupContent; + return llvm::Error::success(); + case ARM64_RELOC_UNSIGNED | rLength8: + // ex: .quad Lfoo + N + *kind = pointer64; + return atomFromAddress(reloc.symbol, *(const little64_t *)fixupContent, + target, addend); + case ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8: + // ex: .quad _foo@GOT + *kind = pointer64ToGOT; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4: + // ex: .long _foo@GOT - . + + // If we are in an .eh_frame section, then the kind of the relocation should + // not be delta32ToGOT. It may instead be unwindCIEToPersonalityFunction. + if (inAtom->contentType() == DefinedAtom::typeCFI) + *kind = unwindCIEToPersonalityFunction; + else + *kind = delta32ToGOT; + + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + default: + return llvm::make_error<GenericError>("unsupported arm64 relocation type"); + } +} + +llvm::Error ArchHandler_arm64::getPairReferenceInfo( + const normalized::Relocation &reloc1, const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, uint32_t offsetInAtom, uint64_t fixupAddress, + bool swap, bool scatterable, FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4): + // ex: bl _foo+8 + *kind = branch26; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return llvm::Error::success(); + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4): + // ex: adrp x1, _foo@PAGE + *kind = page21; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return llvm::Error::success(); + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4): { + // ex: ldr w0, [x1, _foo@PAGEOFF] + uint32_t cont32 = (int32_t)*(const little32_t *)fixupContent; + *kind = offset12KindFromInstruction(cont32); + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return llvm::Error::success(); + } + case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + ARM64_RELOC_UNSIGNED | rExtern | rLength8): + // ex: .quad _foo - . + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + + // If we are in an .eh_frame section, then the kind of the relocation should + // not be delta64. It may instead be unwindFDEToFunction. + if (inAtom->contentType() == DefinedAtom::typeCFI) + *kind = unwindFDEToFunction; + else + *kind = delta64; + + // The offsets of the 2 relocations must match + if (reloc1.offset != reloc2.offset) + return llvm::make_error<GenericError>( + "paired relocs must have the same offset"); + *addend = (int64_t)*(const little64_t *)fixupContent + offsetInAtom; + return llvm::Error::success(); + case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + ARM64_RELOC_UNSIGNED | rExtern | rLength4): + // ex: .quad _foo - . + *kind = delta32; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + offsetInAtom; + return llvm::Error::success(); + default: + return llvm::make_error<GenericError>("unsupported arm64 relocation pair"); + } +} + +void ArchHandler_arm64::generateAtomContent( + const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. +#ifndef NDEBUG + if (atom.begin() != atom.end()) { + DEBUG_WITH_TYPE("atom-content", llvm::dbgs() + << "Applying fixups to atom:\n" + << " address=" + << llvm::format(" 0x%09lX", &atom) + << ", file=#" + << atom.file().ordinal() + << ", atom=#" + << atom.ordinal() + << ", name=" + << atom.name() + << ", type=" + << atom.contentType() + << "\n"); + } +#endif + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + bool targetUnnamed = target->name().empty(); + uint64_t targetAddress = 0; + if (isa<DefinedAtom>(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, targetUnnamed); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, imageBaseAddress, + findSectionAddress); + } + } +} + +void ArchHandler_arm64::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + int32_t displacement; + uint32_t instruction; + uint32_t value32; + uint32_t value64; + switch (static_cast<Arm64Kind>(ref.kindValue())) { + case branch26: + displacement = (targetAddress - fixupAddress) + ref.addend(); + *loc32 = setDisplacementInBranch26(*loc32, displacement); + return; + case page21: + case gotPage21: + case tlvPage21: + displacement = + ((targetAddress + ref.addend()) & (-4096)) - (fixupAddress & (-4096)); + *loc32 = setDisplacementInADRP(*loc32, displacement); + return; + case offset12: + case gotOffset12: + case tlvOffset12: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + *loc32 = setImm12(*loc32, displacement); + return; + case offset12scale2: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x1) == 0) && + "scaled imm12 not accessing 2-byte aligneds"); + *loc32 = setImm12(*loc32, displacement >> 1); + return; + case offset12scale4: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x3) == 0) && + "scaled imm12 not accessing 4-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 2); + return; + case offset12scale8: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x7) == 0) && + "scaled imm12 not accessing 8-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 3); + return; + case offset12scale16: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0xF) == 0) && + "scaled imm12 not accessing 16-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 4); + return; + case addOffset12: + instruction = *loc32; + assert(((instruction & 0xFFC00000) == 0xF9400000) && + "GOT reloc is not an LDR instruction"); + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + value32 = 0x91000000 | (instruction & 0x000003FF); + instruction = setImm12(value32, displacement); + *loc32 = instruction; + return; + case pointer64: + case pointer64ToGOT: + *loc64 = targetAddress + ref.addend(); + return; + case delta64: + case unwindFDEToFunction: + *loc64 = (targetAddress - fixupAddress) + ref.addend(); + return; + case delta32: + case delta32ToGOT: + case unwindCIEToPersonalityFunction: + *loc32 = (targetAddress - fixupAddress) + ref.addend(); + return; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case lazyPointer: + // Do nothing + return; + case lazyImmediateLocation: + *loc32 = ref.addend(); + return; + case imageOffset: + *loc32 = (targetAddress - imageBaseAddress) + ref.addend(); + return; + case imageOffsetGot: + llvm_unreachable("imageOffsetGot should have been changed to imageOffset"); + break; + case unwindInfoToEhFrame: + value64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + assert(value64 < 0xffffffU && "offset in __eh_frame too large"); + *loc32 = (*loc32 & 0xff000000U) | value64; + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("invalid arm64 Reference Kind"); +} + +void ArchHandler_arm64::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool targetUnnamed) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + switch (static_cast<Arm64Kind>(ref.kindValue())) { + case branch26: + *loc32 = setDisplacementInBranch26(*loc32, 0); + return; + case page21: + case gotPage21: + case tlvPage21: + *loc32 = setDisplacementInADRP(*loc32, 0); + return; + case offset12: + case offset12scale2: + case offset12scale4: + case offset12scale8: + case offset12scale16: + case gotOffset12: + case tlvOffset12: + *loc32 = setImm12(*loc32, 0); + return; + case pointer64: + if (targetUnnamed) + *loc64 = targetAddress + ref.addend(); + else + *loc64 = ref.addend(); + return; + case delta64: + *loc64 = ref.addend() + inAtomAddress - fixupAddress; + return; + case unwindFDEToFunction: + // We don't emit unwindFDEToFunction in -r mode as they are implicitly + // generated from the data in the __eh_frame section. So here we need + // to use the targetAddress so that we can generate the full relocation + // when we parse again later. + *loc64 = targetAddress - fixupAddress; + return; + case delta32: + *loc32 = ref.addend() + inAtomAddress - fixupAddress; + return; + case negDelta32: + // We don't emit negDelta32 in -r mode as they are implicitly + // generated from the data in the __eh_frame section. So here we need + // to use the targetAddress so that we can generate the full relocation + // when we parse again later. + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case pointer64ToGOT: + *loc64 = 0; + return; + case delta32ToGOT: + *loc32 = inAtomAddress - fixupAddress; + return; + case unwindCIEToPersonalityFunction: + // We don't emit unwindCIEToPersonalityFunction in -r mode as they are + // implicitly generated from the data in the __eh_frame section. So here we + // need to use the targetAddress so that we can generate the full relocation + // when we parse again later. + *loc32 = targetAddress - fixupAddress; + return; + case addOffset12: + llvm_unreachable("lazy reference kind implies GOT pass was run"); + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + case imageOffset: + case imageOffsetGot: + case unwindInfoToEhFrame: + llvm_unreachable("fixup implies __unwind_info"); + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown arm64 Reference Kind"); +} + +void ArchHandler_arm64::appendSectionRelocations( + const DefinedAtom &atom, uint64_t atomSectionOffset, const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + switch (static_cast<Arm64Kind>(ref.kindValue())) { + case branch26: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4); + } + return; + case page21: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4); + } + return; + case offset12: + case offset12scale2: + case offset12scale4: + case offset12scale8: + case offset12scale16: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4); + } + return; + case gotPage21: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4); + return; + case gotOffset12: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4); + return; + case tlvPage21: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4); + return; + case tlvOffset12: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4); + return; + case pointer64: + if (ref.target()->name().empty()) + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rLength8); + else + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case delta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + ARM64_RELOC_SUBTRACTOR | rExtern | rLength8); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case delta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + ARM64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case pointer64ToGOT: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8); + return; + case delta32ToGOT: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4); + return; + case addOffset12: + llvm_unreachable("lazy reference kind implies GOT pass was run"); + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + case imageOffset: + case imageOffsetGot: + llvm_unreachable("deltas from mach_header can only be in final images"); + case unwindCIEToPersonalityFunction: + case unwindFDEToFunction: + case unwindInfoToEhFrame: + case negDelta32: + // Do nothing. + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown arm64 Reference Kind"); +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_arm64() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_arm64()); +} + +} // namespace mach_o +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp new file mode 100644 index 000000000000..c940ea542ee4 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp @@ -0,0 +1,647 @@ +//===- lib/FileFormat/MachO/ArchHandler_x86.cpp ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle16_t; +using llvm::support::ulittle32_t; + +using llvm::support::little16_t; +using llvm::support::little32_t; + +class ArchHandler_x86 : public ArchHandler { +public: + ArchHandler_x86() = default; + ~ArchHandler_x86() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { return Reference::KindArch::x86; } + + const StubInfo &stubInfo() override { return _sStubInfo; } + bool isCallSite(const Reference &) override; + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + bool needsCompactUnwind() override { + return false; + } + + Reference::KindValue imageOffsetKind() override { + return invalid; + } + + Reference::KindValue imageOffsetKindIndirect() override { + return invalid; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return invalid; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override{ + return delta32; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return invalid; + } + + Reference::KindValue pointerKind() override { + return invalid; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x04000000U; + } + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + + bool isDataInCodeTransition(Reference::KindValue refKind) override { + switch (refKind) { + case modeCode: + case modeData: + return true; + default: + return false; + break; + } + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return modeCode; + } + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum X86Kind : Reference::KindValue { + invalid, /// for error condition + + modeCode, /// Content starting at this offset is code. + modeData, /// Content starting at this offset is data. + + // Kinds found in mach-o .o files: + branch32, /// ex: call _foo + branch16, /// ex: callw _foo + abs32, /// ex: movl _foo, %eax + funcRel32, /// ex: movl _foo-L1(%eax), %eax + pointer32, /// ex: .long _foo + delta32, /// ex: .long _foo - . + negDelta32, /// ex: .long . - _foo + + // Kinds introduced by Passes: + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + }; + + static bool useExternalRelocationTo(const Atom &target); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress); +}; + +//===----------------------------------------------------------------------===// +// ArchHandler_x86 +//===----------------------------------------------------------------------===// + +const Registry::KindStrings ArchHandler_x86::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(modeCode), + LLD_KIND_STRING_ENTRY(modeData), + LLD_KIND_STRING_ENTRY(branch32), + LLD_KIND_STRING_ENTRY(branch16), + LLD_KIND_STRING_ENTRY(abs32), + LLD_KIND_STRING_ENTRY(funcRel32), + LLD_KIND_STRING_ENTRY(pointer32), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_x86::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::x86, pointer32, 0, 0 }, + { Reference::KindArch::x86, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::x86, pointer32, 0, 0 }, + + // x86 code alignment + 1, + + // Stub size and code + 6, + { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer + { Reference::KindArch::x86, abs32, 2, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 10, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $lazy-info-offset + 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper + { Reference::KindArch::x86, lazyImmediateLocation, 1, 0 }, + { Reference::KindArch::x86, branch32, 6, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeNonLazyPointer, + + // Stub Helper-Common size and code + 12, + // Stub helper alignment + 2, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $dyld_ImageLoaderCache + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *_fast_lazy_bind + 0x90 }, // nop + { Reference::KindArch::x86, abs32, 1, 0 }, + { false, 0, 0, 0 }, + { Reference::KindArch::x86, abs32, 7, 0 }, + { false, 0, 0, 0 } +}; + +bool ArchHandler_x86::isCallSite(const Reference &ref) { + return (ref.kindValue() == branch32); +} + +bool ArchHandler_x86::isPointer(const Reference &ref) { + return (ref.kindValue() == pointer32); +} + +bool ArchHandler_x86::isPairedReloc(const Relocation &reloc) { + if (!reloc.scattered) + return false; + return (reloc.type == GENERIC_RELOC_LOCAL_SECTDIFF) || + (reloc.type == GENERIC_RELOC_SECTDIFF); +} + +llvm::Error +ArchHandler_x86::getReferenceInfo(const Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + DefinedAtom::ContentPermissions perms; + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + switch (relocPattern(reloc)) { + case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength4: + // ex: call _foo (and _foo undefined) + *kind = branch32; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = fixupAddress + 4 + (int32_t)*(const little32_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rLength4: + // ex: call _foo (and _foo defined) + *kind = branch32; + targetAddress = + fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4: + // ex: call _foo+n (and _foo defined) + *kind = branch32; + targetAddress = + fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = targetAddress - reloc.value; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength2: + // ex: callw _foo (and _foo undefined) + *kind = branch16; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = fixupAddress + 2 + (int16_t)*(const little16_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rLength2: + // ex: callw _foo (and _foo defined) + *kind = branch16; + targetAddress = + fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2: + // ex: callw _foo+n (and _foo defined) + *kind = branch16; + targetAddress = + fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = targetAddress - reloc.value; + break; + case GENERIC_RELOC_VANILLA | rExtern | rLength4: + // ex: movl _foo, %eax (and _foo undefined) + // ex: .long _foo (and _foo undefined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const ulittle32_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rLength4: + // ex: movl _foo, %eax (and _foo defined) + // ex: .long _foo (and _foo defined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + targetAddress = *(const ulittle32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rLength4: + // ex: .long _foo+n (and _foo defined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = *(const ulittle32_t *)fixupContent - reloc.value; + break; + default: + return llvm::make_error<GenericError>("unsupported i386 relocation type"); + } + return llvm::Error::success(); +} + +llvm::Error +ArchHandler_x86::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddr, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + DefinedAtom::ContentPermissions perms = inAtom->permissions(); + uint32_t fromAddress; + uint32_t toAddress; + uint32_t value; + const lld::Atom *fromTarget; + Reference::Addend offsetInTo; + Reference::Addend offsetInFrom; + switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((GENERIC_RELOC_SECTDIFF | rScattered | rLength4) << 16 | + GENERIC_RELOC_PAIR | rScattered | rLength4): + case ((GENERIC_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | + GENERIC_RELOC_PAIR | rScattered | rLength4): + toAddress = reloc1.value; + fromAddress = reloc2.value; + value = *(const little32_t *)fixupContent; + if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) + return ec; + if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) + return ec; + if (fromTarget != inAtom) { + if (*target != inAtom) + return llvm::make_error<GenericError>( + "SECTDIFF relocation where neither target is in atom"); + *kind = negDelta32; + *addend = toAddress - value - fromAddress; + *target = fromTarget; + } else { + if ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) { + // SECTDIFF relocations are used in i386 codegen where the function + // prolog does a CALL to the next instruction which POPs the return + // address into EBX which becomes the pic-base register. The POP + // instruction is label the used for the subtrahend in expressions. + // The funcRel32 kind represents the 32-bit delta to some symbol from + // the start of the function (atom) containing the funcRel32. + *kind = funcRel32; + uint32_t ta = fromAddress + value - toAddress; + *addend = ta - offsetInFrom; + } else { + *kind = delta32; + *addend = fromAddress + value - toAddress; + } + } + return llvm::Error::success(); + break; + default: + return llvm::make_error<GenericError>("unsupported i386 relocation type"); + } +} + +void ArchHandler_x86::generateAtomContent(const DefinedAtom &atom, + bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + if (isa<DefinedAtom>(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } + } +} + +void ArchHandler_x86::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + switch (static_cast<X86Kind>(ref.kindValue())) { + case branch32: + *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); + break; + case branch16: + *loc32 = (targetAddress - (fixupAddress + 2)) + ref.addend(); + break; + case pointer32: + case abs32: + *loc32 = targetAddress + ref.addend(); + break; + case funcRel32: + *loc32 = targetAddress - inAtomAddress + ref.addend(); + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + break; + case modeCode: + case modeData: + case lazyPointer: + // do nothing + break; + case lazyImmediateLocation: + *loc32 = ref.addend(); + break; + case invalid: + llvm_unreachable("invalid x86 Reference Kind"); + break; + } +} + +void ArchHandler_x86::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + ulittle16_t *loc16 = reinterpret_cast<ulittle16_t *>(loc); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + switch (static_cast<X86Kind>(ref.kindValue())) { + case branch32: + if (useExternalReloc) + *loc32 = ref.addend() - (fixupAddress + 4); + else + *loc32 =(targetAddress - (fixupAddress+4)) + ref.addend(); + break; + case branch16: + if (useExternalReloc) + *loc16 = ref.addend() - (fixupAddress + 2); + else + *loc16 = (targetAddress - (fixupAddress+2)) + ref.addend(); + break; + case pointer32: + case abs32: + *loc32 = targetAddress + ref.addend(); + break; + case funcRel32: + *loc32 = targetAddress - inAtomAddress + ref.addend(); // FIXME + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + break; + case modeCode: + case modeData: + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid x86 Reference Kind"); + break; + } +} + +bool ArchHandler_x86::useExternalRelocationTo(const Atom &target) { + // Undefined symbols are referenced via external relocations. + if (isa<UndefinedAtom>(&target)) + return true; + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(&target)) { + switch (defAtom->merge()) { + case DefinedAtom::mergeAsTentative: + // Tentative definitions are referenced via external relocations. + return true; + case DefinedAtom::mergeAsWeak: + case DefinedAtom::mergeAsWeakAndAddressUsed: + // Global weak-defs are referenced via external relocations. + return (defAtom->scope() == DefinedAtom::scopeGlobal); + default: + break; + } + } + // Everything else is reference via an internal relocation. + return false; +} + +void ArchHandler_x86::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + switch (static_cast<X86Kind>(ref.kindValue())) { + case modeCode: + case modeData: + break; + case branch32: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + GENERIC_RELOC_VANILLA | rPcRel | rLength4); + } + break; + case branch16: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength2); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + GENERIC_RELOC_VANILLA | rPcRel | rLength2); + } + break; + case pointer32: + case abs32: + if (useExternalReloc) + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rLength4); + else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rLength4); + } + break; + case funcRel32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) - ref.addend(), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case delta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case negDelta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + break; + case invalid: + llvm_unreachable("unknown x86 Reference Kind"); + break; + } +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_x86() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_x86()); +} + +} // namespace mach_o +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp new file mode 100644 index 000000000000..d687ca5de5b4 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp @@ -0,0 +1,861 @@ +//===- lib/FileFormat/MachO/ArchHandler_x86_64.cpp ------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::ulittle64_t; + +using llvm::support::little32_t; +using llvm::support::little64_t; + +class ArchHandler_x86_64 : public ArchHandler { +public: + ArchHandler_x86_64() = default; + ~ArchHandler_x86_64() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { + return Reference::KindArch::x86_64; + } + + /// Used by GOTPass to locate GOT References + bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + switch (ref.kindValue()) { + case ripRel32GotLoad: + canBypassGOT = true; + return true; + case ripRel32Got: + canBypassGOT = false; + return true; + case imageOffsetGot: + canBypassGOT = false; + return true; + default: + return false; + } + } + + bool isTLVAccess(const Reference &ref) const override { + assert(ref.kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref.kindArch() == Reference::KindArch::x86_64); + return ref.kindValue() == ripRel32Tlv; + } + + void updateReferenceToTLV(const Reference *ref) override { + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::x86_64); + assert(ref->kindValue() == ripRel32Tlv); + const_cast<Reference*>(ref)->setKindValue(ripRel32); + } + + /// Used by GOTPass to update GOT References + void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::x86_64); + + switch (ref->kindValue()) { + case ripRel32Got: + assert(targetNowGOT && "target must be GOT"); + case ripRel32GotLoad: + const_cast<Reference *>(ref) + ->setKindValue(targetNowGOT ? ripRel32 : ripRel32GotLoadNowLea); + break; + case imageOffsetGot: + const_cast<Reference *>(ref)->setKindValue(imageOffset); + break; + default: + llvm_unreachable("unknown GOT reference kind"); + } + } + + bool needsCompactUnwind() override { + return true; + } + + Reference::KindValue imageOffsetKind() override { + return imageOffset; + } + + Reference::KindValue imageOffsetKindIndirect() override { + return imageOffsetGot; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return ripRel32Got; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override{ + return unwindFDEToFunction; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return unwindInfoToEhFrame; + } + + Reference::KindValue pointerKind() override { + return pointer64; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x04000000U; + } + + const StubInfo &stubInfo() override { return _sStubInfo; } + + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isCallSite(const Reference &) override; + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override { + return (atom->contentType() == DefinedAtom::typeCString); + } + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBase, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum X86_64Kind: Reference::KindValue { + invalid, /// for error condition + + // Kinds found in mach-o .o files: + branch32, /// ex: call _foo + ripRel32, /// ex: movq _foo(%rip), %rax + ripRel32Minus1, /// ex: movb $0x12, _foo(%rip) + ripRel32Minus2, /// ex: movw $0x1234, _foo(%rip) + ripRel32Minus4, /// ex: movl $0x12345678, _foo(%rip) + ripRel32Anon, /// ex: movq L1(%rip), %rax + ripRel32Minus1Anon, /// ex: movb $0x12, L1(%rip) + ripRel32Minus2Anon, /// ex: movw $0x1234, L1(%rip) + ripRel32Minus4Anon, /// ex: movw $0x12345678, L1(%rip) + ripRel32GotLoad, /// ex: movq _foo@GOTPCREL(%rip), %rax + ripRel32Got, /// ex: pushq _foo@GOTPCREL(%rip) + ripRel32Tlv, /// ex: movq _foo@TLVP(%rip), %rdi + pointer64, /// ex: .quad _foo + pointer64Anon, /// ex: .quad L1 + delta64, /// ex: .quad _foo - . + delta32, /// ex: .long _foo - . + delta64Anon, /// ex: .quad L1 - . + delta32Anon, /// ex: .long L1 - . + negDelta64, /// ex: .quad . - _foo + negDelta32, /// ex: .long . - _foo + + // Kinds introduced by Passes: + ripRel32GotLoadNowLea, /// Target of GOT load is in linkage unit so + /// "movq _foo@GOTPCREL(%rip), %rax" can be changed + /// to "leaq _foo(%rip), %rax + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + + imageOffset, /// Location contains offset of atom in final image + imageOffsetGot, /// Location contains offset of GOT entry for atom in + /// final image (typically personality function). + unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in + /// relocatable object (yay for implicit contracts!). + unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to + /// refer to __eh_frame entry. + tlvInitSectionOffset /// Location contains offset tlv init-value atom + /// within the __thread_data section. + }; + + Reference::KindValue kindFromReloc(const normalized::Relocation &reloc); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress); +}; + +const Registry::KindStrings ArchHandler_x86_64::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), LLD_KIND_STRING_ENTRY(branch32), + LLD_KIND_STRING_ENTRY(ripRel32), LLD_KIND_STRING_ENTRY(ripRel32Minus1), + LLD_KIND_STRING_ENTRY(ripRel32Minus2), LLD_KIND_STRING_ENTRY(ripRel32Minus4), + LLD_KIND_STRING_ENTRY(ripRel32Anon), + LLD_KIND_STRING_ENTRY(ripRel32Minus1Anon), + LLD_KIND_STRING_ENTRY(ripRel32Minus2Anon), + LLD_KIND_STRING_ENTRY(ripRel32Minus4Anon), + LLD_KIND_STRING_ENTRY(ripRel32GotLoad), + LLD_KIND_STRING_ENTRY(ripRel32GotLoadNowLea), + LLD_KIND_STRING_ENTRY(ripRel32Got), LLD_KIND_STRING_ENTRY(ripRel32Tlv), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_ENTRY(pointer64), LLD_KIND_STRING_ENTRY(pointer64Anon), + LLD_KIND_STRING_ENTRY(delta32), LLD_KIND_STRING_ENTRY(delta64), + LLD_KIND_STRING_ENTRY(delta32Anon), LLD_KIND_STRING_ENTRY(delta64Anon), + LLD_KIND_STRING_ENTRY(negDelta64), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(imageOffset), LLD_KIND_STRING_ENTRY(imageOffsetGot), + LLD_KIND_STRING_ENTRY(unwindFDEToFunction), + LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame), + LLD_KIND_STRING_ENTRY(tlvInitSectionOffset), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_x86_64::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::x86_64, pointer64, 0, 0 }, + { Reference::KindArch::x86_64, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::x86_64, pointer64, 0, 0 }, + + // x86_64 code alignment 2^1 + 1, + + // Stub size and code + 6, + { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer + { Reference::KindArch::x86_64, ripRel32, 2, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 10, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushq $lazy-info-offset + 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper + { Reference::KindArch::x86_64, lazyImmediateLocation, 1, 0 }, + { Reference::KindArch::x86_64, branch32, 6, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeNonLazyPointer, + + // Stub Helper-Common size and code + 16, + // Stub helper alignment + 2, + { 0x4C, 0x8D, 0x1D, 0x00, 0x00, 0x00, 0x00, // leaq cache(%rip),%r11 + 0x41, 0x53, // push %r11 + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *binder(%rip) + 0x90 }, // nop + { Reference::KindArch::x86_64, ripRel32, 3, 0 }, + { false, 0, 0, 0 }, + { Reference::KindArch::x86_64, ripRel32, 11, 0 }, + { false, 0, 0, 0 } + +}; + +bool ArchHandler_x86_64::isCallSite(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + return (ref.kindValue() == branch32); +} + +bool ArchHandler_x86_64::isPointer(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + Reference::KindValue kind = ref.kindValue(); + return (kind == pointer64 || kind == pointer64Anon); +} + +bool ArchHandler_x86_64::isPairedReloc(const Relocation &reloc) { + return (reloc.type == X86_64_RELOC_SUBTRACTOR); +} + +Reference::KindValue +ArchHandler_x86_64::kindFromReloc(const Relocation &reloc) { + switch(relocPattern(reloc)) { + case X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4: + return branch32; + case X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4: + return ripRel32; + case X86_64_RELOC_SIGNED | rPcRel | rLength4: + return ripRel32Anon; + case X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4: + return ripRel32Minus1; + case X86_64_RELOC_SIGNED_1 | rPcRel | rLength4: + return ripRel32Minus1Anon; + case X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4: + return ripRel32Minus2; + case X86_64_RELOC_SIGNED_2 | rPcRel | rLength4: + return ripRel32Minus2Anon; + case X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4: + return ripRel32Minus4; + case X86_64_RELOC_SIGNED_4 | rPcRel | rLength4: + return ripRel32Minus4Anon; + case X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4: + return ripRel32GotLoad; + case X86_64_RELOC_GOT | rPcRel | rExtern | rLength4: + return ripRel32Got; + case X86_64_RELOC_TLV | rPcRel | rExtern | rLength4: + return ripRel32Tlv; + case X86_64_RELOC_UNSIGNED | rExtern | rLength8: + return pointer64; + case X86_64_RELOC_UNSIGNED | rLength8: + return pointer64Anon; + default: + return invalid; + } +} + +llvm::Error +ArchHandler_x86_64::getReferenceInfo(const Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + *kind = kindFromReloc(reloc); + if (*kind == invalid) + return llvm::make_error<GenericError>("unknown type"); + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + switch (*kind) { + case branch32: + case ripRel32: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little32_t *)fixupContent; + return llvm::Error::success(); + case ripRel32Minus1: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 1; + return llvm::Error::success(); + case ripRel32Minus2: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 2; + return llvm::Error::success(); + case ripRel32Minus4: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 4; + return llvm::Error::success(); + case ripRel32Anon: + targetAddress = fixupAddress + 4 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32Minus1Anon: + targetAddress = fixupAddress + 5 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32Minus2Anon: + targetAddress = fixupAddress + 6 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32Minus4Anon: + targetAddress = fixupAddress + 8 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32GotLoad: + case ripRel32Got: + case ripRel32Tlv: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little32_t *)fixupContent; + return llvm::Error::success(); + case tlvInitSectionOffset: + case pointer64: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // If this is the 3rd pointer of a tlv-thunk (i.e. the pointer to the TLV's + // initial value) we need to handle it specially. + if (inAtom->contentType() == DefinedAtom::typeThunkTLV && + offsetInAtom == 16) { + *kind = tlvInitSectionOffset; + assert(*addend == 0 && "TLV-init has non-zero addend?"); + } else + *addend = *(const little64_t *)fixupContent; + return llvm::Error::success(); + case pointer64Anon: + targetAddress = *(const little64_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + default: + llvm_unreachable("bad reloc kind"); + } +} + +llvm::Error +ArchHandler_x86_64::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + const lld::Atom *fromTarget; + if (auto ec = atomFromSymbolIndex(reloc1.symbol, &fromTarget)) + return ec; + + switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + X86_64_RELOC_UNSIGNED | rExtern | rLength8): { + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + uint64_t encodedAddend = (int64_t)*(const little64_t *)fixupContent; + if (inAtom == fromTarget) { + if (inAtom->contentType() == DefinedAtom::typeCFI) + *kind = unwindFDEToFunction; + else + *kind = delta64; + *addend = encodedAddend + offsetInAtom; + } else if (inAtom == *target) { + *kind = negDelta64; + *addend = encodedAddend - offsetInAtom; + *target = fromTarget; + } else + return llvm::make_error<GenericError>("Invalid pointer diff"); + return llvm::Error::success(); + } + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + X86_64_RELOC_UNSIGNED | rExtern | rLength4): { + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + uint32_t encodedAddend = (int32_t)*(const little32_t *)fixupContent; + if (inAtom == fromTarget) { + *kind = delta32; + *addend = encodedAddend + offsetInAtom; + } else if (inAtom == *target) { + *kind = negDelta32; + *addend = encodedAddend - offsetInAtom; + *target = fromTarget; + } else + return llvm::make_error<GenericError>("Invalid pointer diff"); + return llvm::Error::success(); + } + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + X86_64_RELOC_UNSIGNED | rLength8): + if (fromTarget != inAtom) + return llvm::make_error<GenericError>("pointer diff not in base atom"); + *kind = delta64Anon; + targetAddress = offsetInAtom + (int64_t)*(const little64_t *)fixupContent; + return atomFromAddress(reloc2.symbol, targetAddress, target, addend); + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + X86_64_RELOC_UNSIGNED | rLength4): + if (fromTarget != inAtom) + return llvm::make_error<GenericError>("pointer diff not in base atom"); + *kind = delta32Anon; + targetAddress = offsetInAtom + (int32_t)*(const little32_t *)fixupContent; + return atomFromAddress(reloc2.symbol, targetAddress, target, addend); + default: + return llvm::make_error<GenericError>("unknown pair"); + } +} + +void ArchHandler_x86_64::generateAtomContent( + const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + if (isa<DefinedAtom>(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress, imageBaseAddress, findSectionAddress); + } + } +} + +void ArchHandler_x86_64::applyFixupFinal( + const Reference &ref, uint8_t *loc, uint64_t fixupAddress, + uint64_t targetAddress, uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + switch (static_cast<X86_64Kind>(ref.kindValue())) { + case branch32: + case ripRel32: + case ripRel32Anon: + case ripRel32Got: + case ripRel32GotLoad: + case ripRel32Tlv: + *loc32 = targetAddress - (fixupAddress + 4) + ref.addend(); + return; + case pointer64: + case pointer64Anon: + *loc64 = targetAddress + ref.addend(); + return; + case tlvInitSectionOffset: + *loc64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + return; + case ripRel32Minus1: + case ripRel32Minus1Anon: + *loc32 = targetAddress - (fixupAddress + 5) + ref.addend(); + return; + case ripRel32Minus2: + case ripRel32Minus2Anon: + *loc32 = targetAddress - (fixupAddress + 6) + ref.addend(); + return; + case ripRel32Minus4: + case ripRel32Minus4Anon: + *loc32 = targetAddress - (fixupAddress + 8) + ref.addend(); + return; + case delta32: + case delta32Anon: + *loc32 = targetAddress - fixupAddress + ref.addend(); + return; + case delta64: + case delta64Anon: + case unwindFDEToFunction: + *loc64 = targetAddress - fixupAddress + ref.addend(); + return; + case ripRel32GotLoadNowLea: + // Change MOVQ to LEA + assert(loc[-2] == 0x8B); + loc[-2] = 0x8D; + *loc32 = targetAddress - (fixupAddress + 4) + ref.addend(); + return; + case negDelta64: + *loc64 = fixupAddress - targetAddress + ref.addend(); + return; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case lazyPointer: + // Do nothing + return; + case lazyImmediateLocation: + *loc32 = ref.addend(); + return; + case imageOffset: + case imageOffsetGot: + *loc32 = (targetAddress - imageBaseAddress) + ref.addend(); + return; + case unwindInfoToEhFrame: { + uint64_t val = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + assert(val < 0xffffffU && "offset in __eh_frame too large"); + *loc32 = (*loc32 & 0xff000000U) | val; + return; + } + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("invalid x86_64 Reference Kind"); +} + +void ArchHandler_x86_64::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + switch (static_cast<X86_64Kind>(ref.kindValue())) { + case branch32: + case ripRel32: + case ripRel32Got: + case ripRel32GotLoad: + case ripRel32Tlv: + *loc32 = ref.addend(); + return; + case ripRel32Anon: + *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); + return; + case tlvInitSectionOffset: + case pointer64: + *loc64 = ref.addend(); + return; + case pointer64Anon: + *loc64 = targetAddress + ref.addend(); + return; + case ripRel32Minus1: + *loc32 = ref.addend() - 1; + return; + case ripRel32Minus1Anon: + *loc32 = (targetAddress - (fixupAddress + 5)) + ref.addend(); + return; + case ripRel32Minus2: + *loc32 = ref.addend() - 2; + return; + case ripRel32Minus2Anon: + *loc32 = (targetAddress - (fixupAddress + 6)) + ref.addend(); + return; + case ripRel32Minus4: + *loc32 = ref.addend() - 4; + return; + case ripRel32Minus4Anon: + *loc32 = (targetAddress - (fixupAddress + 8)) + ref.addend(); + return; + case delta32: + *loc32 = ref.addend() + inAtomAddress - fixupAddress; + return; + case delta32Anon: + // The value we write here should be the the delta to the target + // after taking in to account the difference from the fixup back to the + // last defined label + // ie, if we have: + // _base: ... + // Lfixup: .quad Ltarget - . + // ... + // Ltarget: + // + // Then we want to encode the value (Ltarget + addend) - (LFixup - _base) + *loc32 = (targetAddress + ref.addend()) - (fixupAddress - inAtomAddress); + return; + case delta64: + *loc64 = ref.addend() + inAtomAddress - fixupAddress; + return; + case delta64Anon: + // The value we write here should be the the delta to the target + // after taking in to account the difference from the fixup back to the + // last defined label + // ie, if we have: + // _base: ... + // Lfixup: .quad Ltarget - . + // ... + // Ltarget: + // + // Then we want to encode the value (Ltarget + addend) - (LFixup - _base) + *loc64 = (targetAddress + ref.addend()) - (fixupAddress - inAtomAddress); + return; + case negDelta64: + *loc64 = ref.addend() + fixupAddress - inAtomAddress; + return; + case negDelta32: + *loc32 = ref.addend() + fixupAddress - inAtomAddress; + return; + case ripRel32GotLoadNowLea: + llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run"); + return; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + return; + case imageOffset: + case imageOffsetGot: + case unwindInfoToEhFrame: + llvm_unreachable("fixup implies __unwind_info"); + return; + case unwindFDEToFunction: + // Do nothing for now + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown x86_64 Reference Kind"); +} + +void ArchHandler_x86_64::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + switch (static_cast<X86_64Kind>(ref.kindValue())) { + case branch32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4); + return; + case ripRel32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED | rPcRel | rLength4 ); + return; + case ripRel32Got: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_GOT | rPcRel | rExtern | rLength4 ); + return; + case ripRel32GotLoad: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Tlv: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_TLV | rPcRel | rExtern | rLength4 ); + return; + case tlvInitSectionOffset: + case pointer64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case pointer64Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength8); + return; + case ripRel32Minus1: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus1Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_1 | rPcRel | rLength4 ); + return; + case ripRel32Minus2: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus2Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_2 | rPcRel | rLength4 ); + return; + case ripRel32Minus4: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus4Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_4 | rPcRel | rLength4 ); + return; + case delta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case delta32Anon: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength4 ); + return; + case delta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8 ); + return; + case delta64Anon: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength8 ); + return; + case unwindFDEToFunction: + case unwindInfoToEhFrame: + return; + case negDelta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case negDelta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8 ); + return; + case ripRel32GotLoadNowLea: + llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run"); + return; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + return; + case imageOffset: + case imageOffsetGot: + llvm_unreachable("__unwind_info references should have been resolved"); + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown x86_64 Reference Kind"); +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_x86_64() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_x86_64()); +} + +} // namespace mach_o +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/Atoms.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/Atoms.h new file mode 100644 index 000000000000..573efca9f6f9 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/Atoms.h @@ -0,0 +1,181 @@ +//===- lib/ReaderWriter/MachO/Atoms.h ---------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_ATOMS_H +#define LLD_READER_WRITER_MACHO_ATOMS_H + +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include <cstdint> +#include <string> + +namespace lld { + +class File; + +namespace mach_o { + +class MachODefinedAtom : public SimpleDefinedAtom { +public: + MachODefinedAtom(const File &f, const StringRef name, Scope scope, + ContentType type, Merge merge, bool thumb, bool noDeadStrip, + const ArrayRef<uint8_t> content, Alignment align) + : SimpleDefinedAtom(f), _name(name), _content(content), + _align(align), _contentType(type), _scope(scope), _merge(merge), + _thumb(thumb), _noDeadStrip(noDeadStrip) {} + + // Constructor for zero-fill content + MachODefinedAtom(const File &f, const StringRef name, Scope scope, + ContentType type, uint64_t size, bool noDeadStrip, + Alignment align) + : SimpleDefinedAtom(f), _name(name), + _content(ArrayRef<uint8_t>(nullptr, size)), _align(align), + _contentType(type), _scope(scope), _merge(mergeNo), _thumb(false), + _noDeadStrip(noDeadStrip) {} + + ~MachODefinedAtom() override = default; + + uint64_t size() const override { return _content.size(); } + + ContentType contentType() const override { return _contentType; } + + Alignment alignment() const override { return _align; } + + StringRef name() const override { return _name; } + + Scope scope() const override { return _scope; } + + Merge merge() const override { return _merge; } + + DeadStripKind deadStrip() const override { + if (_contentType == DefinedAtom::typeInitializerPtr) + return deadStripNever; + if (_contentType == DefinedAtom::typeTerminatorPtr) + return deadStripNever; + if (_noDeadStrip) + return deadStripNever; + return deadStripNormal; + } + + ArrayRef<uint8_t> rawContent() const override { + // Note: Zerofill atoms have a content pointer which is null. + return _content; + } + + bool isThumb() const { return _thumb; } + +private: + const StringRef _name; + const ArrayRef<uint8_t> _content; + const DefinedAtom::Alignment _align; + const ContentType _contentType; + const Scope _scope; + const Merge _merge; + const bool _thumb; + const bool _noDeadStrip; +}; + +class MachODefinedCustomSectionAtom : public MachODefinedAtom { +public: + MachODefinedCustomSectionAtom(const File &f, const StringRef name, + Scope scope, ContentType type, Merge merge, + bool thumb, bool noDeadStrip, + const ArrayRef<uint8_t> content, + StringRef sectionName, Alignment align) + : MachODefinedAtom(f, name, scope, type, merge, thumb, noDeadStrip, + content, align), + _sectionName(sectionName) {} + + ~MachODefinedCustomSectionAtom() override = default; + + SectionChoice sectionChoice() const override { + return DefinedAtom::sectionCustomRequired; + } + + StringRef customSectionName() const override { + return _sectionName; + } +private: + StringRef _sectionName; +}; + +class MachOTentativeDefAtom : public SimpleDefinedAtom { +public: + MachOTentativeDefAtom(const File &f, const StringRef name, Scope scope, + uint64_t size, DefinedAtom::Alignment align) + : SimpleDefinedAtom(f), _name(name), _scope(scope), _size(size), + _align(align) {} + + ~MachOTentativeDefAtom() override = default; + + uint64_t size() const override { return _size; } + + Merge merge() const override { return DefinedAtom::mergeAsTentative; } + + ContentType contentType() const override { return DefinedAtom::typeZeroFill; } + + Alignment alignment() const override { return _align; } + + StringRef name() const override { return _name; } + + Scope scope() const override { return _scope; } + + ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); } + +private: + const std::string _name; + const Scope _scope; + const uint64_t _size; + const DefinedAtom::Alignment _align; +}; + +class MachOSharedLibraryAtom : public SharedLibraryAtom { +public: + MachOSharedLibraryAtom(const File &file, StringRef name, + StringRef dylibInstallName, bool weakDef) + : SharedLibraryAtom(), _file(file), _name(name), + _dylibInstallName(dylibInstallName) {} + ~MachOSharedLibraryAtom() override = default; + + StringRef loadName() const override { return _dylibInstallName; } + + bool canBeNullAtRuntime() const override { + // FIXME: this may actually be changeable. For now, all symbols are strongly + // defined though. + return false; + } + + const File &file() const override { return _file; } + + StringRef name() const override { return _name; } + + Type type() const override { + // Unused in MachO (I think). + return Type::Unknown; + } + + uint64_t size() const override { + // Unused in MachO (I think) + return 0; + } + +private: + const File &_file; + StringRef _name; + StringRef _dylibInstallName; +}; + +} // end namespace mach_o +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_ATOMS_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt new file mode 100644 index 000000000000..3b0698525aa5 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt @@ -0,0 +1,34 @@ +add_lld_library(lldMachO + ArchHandler.cpp + ArchHandler_arm.cpp + ArchHandler_arm64.cpp + ArchHandler_x86.cpp + ArchHandler_x86_64.cpp + CompactUnwindPass.cpp + GOTPass.cpp + LayoutPass.cpp + MachOLinkingContext.cpp + MachONormalizedFileBinaryReader.cpp + MachONormalizedFileBinaryWriter.cpp + MachONormalizedFileFromAtoms.cpp + MachONormalizedFileToAtoms.cpp + MachONormalizedFileYAML.cpp + ObjCPass.cpp + ShimPass.cpp + StubsPass.cpp + TLVPass.cpp + WriterMachO.cpp + + LINK_COMPONENTS + DebugInfoDWARF + Object + Support + Demangle + + LINK_LIBS + lldCore + lldYAML + ${PTHREAD_LIB} + ) + +include_directories(.) diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp new file mode 100644 index 000000000000..49d518456a45 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp @@ -0,0 +1,582 @@ +//===- lib/ReaderWriter/MachO/CompactUnwindPass.cpp -------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file A pass to convert MachO's __compact_unwind sections into the final +/// __unwind_info format used during runtime. See +/// mach-o/compact_unwind_encoding.h for more details on the formats involved. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" + +#define DEBUG_TYPE "macho-compact-unwind" + +namespace lld { +namespace mach_o { + +namespace { +struct CompactUnwindEntry { + const Atom *rangeStart; + const Atom *personalityFunction; + const Atom *lsdaLocation; + const Atom *ehFrame; + + uint32_t rangeLength; + + // There are 3 types of compact unwind entry, distinguished by the encoding + // value: 0 indicates a function with no unwind info; + // _archHandler.dwarfCompactUnwindType() indicates that the entry defers to + // __eh_frame, and that the ehFrame entry will be valid; any other value is a + // real compact unwind entry -- personalityFunction will be set and + // lsdaLocation may be. + uint32_t encoding; + + CompactUnwindEntry(const DefinedAtom *function) + : rangeStart(function), personalityFunction(nullptr), + lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(function->size()), + encoding(0) {} + + CompactUnwindEntry() + : rangeStart(nullptr), personalityFunction(nullptr), + lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(0), encoding(0) {} +}; + +struct UnwindInfoPage { + ArrayRef<CompactUnwindEntry> entries; +}; +} + +class UnwindInfoAtom : public SimpleDefinedAtom { +public: + UnwindInfoAtom(ArchHandler &archHandler, const File &file, bool isBig, + std::vector<const Atom *> &personalities, + std::vector<uint32_t> &commonEncodings, + std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) + : SimpleDefinedAtom(file), _archHandler(archHandler), + _commonEncodingsOffset(7 * sizeof(uint32_t)), + _personalityArrayOffset(_commonEncodingsOffset + + commonEncodings.size() * sizeof(uint32_t)), + _topLevelIndexOffset(_personalityArrayOffset + + personalities.size() * sizeof(uint32_t)), + _lsdaIndexOffset(_topLevelIndexOffset + + 3 * (pages.size() + 1) * sizeof(uint32_t)), + _firstPageOffset(_lsdaIndexOffset + 2 * numLSDAs * sizeof(uint32_t)), + _isBig(isBig) { + + addHeader(commonEncodings.size(), personalities.size(), pages.size()); + addCommonEncodings(commonEncodings); + addPersonalityFunctions(personalities); + addTopLevelIndexes(pages); + addLSDAIndexes(pages, numLSDAs); + addSecondLevelPages(pages); + } + + ~UnwindInfoAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeProcessedUnwindInfo; + } + + Alignment alignment() const override { return 4; } + + uint64_t size() const override { return _contents.size(); } + + ContentPermissions permissions() const override { + return DefinedAtom::permR__; + } + + ArrayRef<uint8_t> rawContent() const override { return _contents; } + + void addHeader(uint32_t numCommon, uint32_t numPersonalities, + uint32_t numPages) { + using normalized::write32; + + uint32_t headerSize = 7 * sizeof(uint32_t); + _contents.resize(headerSize); + + uint8_t *headerEntries = _contents.data(); + // version + write32(headerEntries, 1, _isBig); + // commonEncodingsArraySectionOffset + write32(headerEntries + sizeof(uint32_t), _commonEncodingsOffset, _isBig); + // commonEncodingsArrayCount + write32(headerEntries + 2 * sizeof(uint32_t), numCommon, _isBig); + // personalityArraySectionOffset + write32(headerEntries + 3 * sizeof(uint32_t), _personalityArrayOffset, + _isBig); + // personalityArrayCount + write32(headerEntries + 4 * sizeof(uint32_t), numPersonalities, _isBig); + // indexSectionOffset + write32(headerEntries + 5 * sizeof(uint32_t), _topLevelIndexOffset, _isBig); + // indexCount + write32(headerEntries + 6 * sizeof(uint32_t), numPages + 1, _isBig); + } + + /// Add the list of common encodings to the section; this is simply an array + /// of uint32_t compact values. Size has already been specified in the header. + void addCommonEncodings(std::vector<uint32_t> &commonEncodings) { + using normalized::write32; + + _contents.resize(_commonEncodingsOffset + + commonEncodings.size() * sizeof(uint32_t)); + uint8_t *commonEncodingsArea = + reinterpret_cast<uint8_t *>(_contents.data() + _commonEncodingsOffset); + + for (uint32_t encoding : commonEncodings) { + write32(commonEncodingsArea, encoding, _isBig); + commonEncodingsArea += sizeof(uint32_t); + } + } + + void addPersonalityFunctions(std::vector<const Atom *> personalities) { + _contents.resize(_personalityArrayOffset + + personalities.size() * sizeof(uint32_t)); + + for (unsigned i = 0; i < personalities.size(); ++i) + addImageReferenceIndirect(_personalityArrayOffset + i * sizeof(uint32_t), + personalities[i]); + } + + void addTopLevelIndexes(std::vector<UnwindInfoPage> &pages) { + using normalized::write32; + + uint32_t numIndexes = pages.size() + 1; + _contents.resize(_topLevelIndexOffset + numIndexes * 3 * sizeof(uint32_t)); + + uint32_t pageLoc = _firstPageOffset; + + // The most difficult job here is calculating the LSDAs; everything else + // follows fairly naturally, but we can't state where the first + uint8_t *indexData = &_contents[_topLevelIndexOffset]; + uint32_t numLSDAs = 0; + for (unsigned i = 0; i < pages.size(); ++i) { + // functionOffset + addImageReference(_topLevelIndexOffset + 3 * i * sizeof(uint32_t), + pages[i].entries[0].rangeStart); + // secondLevelPagesSectionOffset + write32(indexData + (3 * i + 1) * sizeof(uint32_t), pageLoc, _isBig); + write32(indexData + (3 * i + 2) * sizeof(uint32_t), + _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); + + for (auto &entry : pages[i].entries) + if (entry.lsdaLocation) + ++numLSDAs; + } + + // Finally, write out the final sentinel index + auto &finalEntry = pages[pages.size() - 1].entries.back(); + addImageReference(_topLevelIndexOffset + + 3 * pages.size() * sizeof(uint32_t), + finalEntry.rangeStart, finalEntry.rangeLength); + // secondLevelPagesSectionOffset => 0 + write32(indexData + (3 * pages.size() + 2) * sizeof(uint32_t), + _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); + } + + void addLSDAIndexes(std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) { + _contents.resize(_lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t)); + + uint32_t curOffset = _lsdaIndexOffset; + for (auto &page : pages) { + for (auto &entry : page.entries) { + if (!entry.lsdaLocation) + continue; + + addImageReference(curOffset, entry.rangeStart); + addImageReference(curOffset + sizeof(uint32_t), entry.lsdaLocation); + curOffset += 2 * sizeof(uint32_t); + } + } + } + + void addSecondLevelPages(std::vector<UnwindInfoPage> &pages) { + for (auto &page : pages) { + addRegularSecondLevelPage(page); + } + } + + void addRegularSecondLevelPage(const UnwindInfoPage &page) { + uint32_t curPageOffset = _contents.size(); + const int16_t headerSize = sizeof(uint32_t) + 2 * sizeof(uint16_t); + uint32_t curPageSize = + headerSize + 2 * page.entries.size() * sizeof(uint32_t); + _contents.resize(curPageOffset + curPageSize); + + using normalized::write32; + using normalized::write16; + // 2 => regular page + write32(&_contents[curPageOffset], 2, _isBig); + // offset of 1st entry + write16(&_contents[curPageOffset + 4], headerSize, _isBig); + write16(&_contents[curPageOffset + 6], page.entries.size(), _isBig); + + uint32_t pagePos = curPageOffset + headerSize; + for (auto &entry : page.entries) { + addImageReference(pagePos, entry.rangeStart); + + write32(_contents.data() + pagePos + sizeof(uint32_t), entry.encoding, + _isBig); + if ((entry.encoding & 0x0f000000U) == + _archHandler.dwarfCompactUnwindType()) + addEhFrameReference(pagePos + sizeof(uint32_t), entry.ehFrame); + + pagePos += 2 * sizeof(uint32_t); + } + } + + void addEhFrameReference(uint32_t offset, const Atom *dest, + Reference::Addend addend = 0) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.unwindRefToEhFrameKind(), offset, dest, addend); + } + + void addImageReference(uint32_t offset, const Atom *dest, + Reference::Addend addend = 0) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.imageOffsetKind(), offset, dest, addend); + } + + void addImageReferenceIndirect(uint32_t offset, const Atom *dest) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.imageOffsetKindIndirect(), offset, dest, 0); + } + +private: + mach_o::ArchHandler &_archHandler; + std::vector<uint8_t> _contents; + uint32_t _commonEncodingsOffset; + uint32_t _personalityArrayOffset; + uint32_t _topLevelIndexOffset; + uint32_t _lsdaIndexOffset; + uint32_t _firstPageOffset; + bool _isBig; +}; + +/// Pass for instantiating and optimizing GOT slots. +/// +class CompactUnwindPass : public Pass { +public: + CompactUnwindPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _file(*_ctx.make_file<MachOFile>("<mach-o Compact Unwind Pass>")), + _isBig(MachOLinkingContext::isBigEndian(_ctx.arch())) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + +private: + llvm::Error perform(SimpleFile &mergedFile) override { + DEBUG(llvm::dbgs() << "MachO Compact Unwind pass\n"); + + std::map<const Atom *, CompactUnwindEntry> unwindLocs; + std::map<const Atom *, const Atom *> dwarfFrames; + std::vector<const Atom *> personalities; + uint32_t numLSDAs = 0; + + // First collect all __compact_unwind and __eh_frame entries, addressable by + // the function referred to. + collectCompactUnwindEntries(mergedFile, unwindLocs, personalities, + numLSDAs); + + collectDwarfFrameEntries(mergedFile, dwarfFrames); + + // Skip rest of pass if no unwind info. + if (unwindLocs.empty() && dwarfFrames.empty()) + return llvm::Error::success(); + + // FIXME: if there are more than 4 personality functions then we need to + // defer to DWARF info for the ones we don't put in the list. They should + // also probably be sorted by frequency. + assert(personalities.size() <= 4); + + // TODO: Find commmon encodings for use by compressed pages. + std::vector<uint32_t> commonEncodings; + + // Now sort the entries by final address and fixup the compact encoding to + // its final form (i.e. set personality function bits & create DWARF + // references where needed). + std::vector<CompactUnwindEntry> unwindInfos = createUnwindInfoEntries( + mergedFile, unwindLocs, personalities, dwarfFrames); + + // Remove any unused eh-frame atoms. + pruneUnusedEHFrames(mergedFile, unwindInfos, unwindLocs, dwarfFrames); + + // Finally, we can start creating pages based on these entries. + + DEBUG(llvm::dbgs() << " Splitting entries into pages\n"); + // FIXME: we split the entries into pages naively: lots of 4k pages followed + // by a small one. ld64 tried to minimize space and align them to real 4k + // boundaries. That might be worth doing, or perhaps we could perform some + // minor balancing for expected number of lookups. + std::vector<UnwindInfoPage> pages; + auto remainingInfos = llvm::makeArrayRef(unwindInfos); + do { + pages.push_back(UnwindInfoPage()); + + // FIXME: we only create regular pages at the moment. These can hold up to + // 1021 entries according to the documentation. + unsigned entriesInPage = std::min(1021U, (unsigned)remainingInfos.size()); + + pages.back().entries = remainingInfos.slice(0, entriesInPage); + remainingInfos = remainingInfos.slice(entriesInPage); + + DEBUG(llvm::dbgs() + << " Page from " << pages.back().entries[0].rangeStart->name() + << " to " << pages.back().entries.back().rangeStart->name() << " + " + << llvm::format("0x%x", pages.back().entries.back().rangeLength) + << " has " << entriesInPage << " entries\n"); + } while (!remainingInfos.empty()); + + auto *unwind = new (_file.allocator()) + UnwindInfoAtom(_archHandler, _file, _isBig, personalities, + commonEncodings, pages, numLSDAs); + mergedFile.addAtom(*unwind); + + // Finally, remove all __compact_unwind atoms now that we've processed them. + mergedFile.removeDefinedAtomsIf([](const DefinedAtom *atom) { + return atom->contentType() == DefinedAtom::typeCompactUnwindInfo; + }); + + return llvm::Error::success(); + } + + void collectCompactUnwindEntries( + const SimpleFile &mergedFile, + std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + std::vector<const Atom *> &personalities, uint32_t &numLSDAs) { + DEBUG(llvm::dbgs() << " Collecting __compact_unwind entries\n"); + + for (const DefinedAtom *atom : mergedFile.defined()) { + if (atom->contentType() != DefinedAtom::typeCompactUnwindInfo) + continue; + + auto unwindEntry = extractCompactUnwindEntry(atom); + unwindLocs.insert(std::make_pair(unwindEntry.rangeStart, unwindEntry)); + + DEBUG(llvm::dbgs() << " Entry for " << unwindEntry.rangeStart->name() + << ", encoding=" + << llvm::format("0x%08x", unwindEntry.encoding)); + if (unwindEntry.personalityFunction) + DEBUG(llvm::dbgs() << ", personality=" + << unwindEntry.personalityFunction->name() + << ", lsdaLoc=" << unwindEntry.lsdaLocation->name()); + DEBUG(llvm::dbgs() << '\n'); + + // Count number of LSDAs we see, since we need to know how big the index + // will be while laying out the section. + if (unwindEntry.lsdaLocation) + ++numLSDAs; + + // Gather the personality functions now, so that they're in deterministic + // order (derived from the DefinedAtom order). + if (unwindEntry.personalityFunction) { + auto pFunc = std::find(personalities.begin(), personalities.end(), + unwindEntry.personalityFunction); + if (pFunc == personalities.end()) + personalities.push_back(unwindEntry.personalityFunction); + } + } + } + + CompactUnwindEntry extractCompactUnwindEntry(const DefinedAtom *atom) { + CompactUnwindEntry entry; + + for (const Reference *ref : *atom) { + switch (ref->offsetInAtom()) { + case 0: + // FIXME: there could legitimately be functions with multiple encoding + // entries. However, nothing produces them at the moment. + assert(ref->addend() == 0 && "unexpected offset into function"); + entry.rangeStart = ref->target(); + break; + case 0x10: + assert(ref->addend() == 0 && "unexpected offset into personality fn"); + entry.personalityFunction = ref->target(); + break; + case 0x18: + assert(ref->addend() == 0 && "unexpected offset into LSDA atom"); + entry.lsdaLocation = ref->target(); + break; + } + } + + if (atom->rawContent().size() < 4 * sizeof(uint32_t)) + return entry; + + using normalized::read32; + entry.rangeLength = + read32(atom->rawContent().data() + 2 * sizeof(uint32_t), _isBig); + entry.encoding = + read32(atom->rawContent().data() + 3 * sizeof(uint32_t), _isBig); + return entry; + } + + void + collectDwarfFrameEntries(const SimpleFile &mergedFile, + std::map<const Atom *, const Atom *> &dwarfFrames) { + for (const DefinedAtom *ehFrameAtom : mergedFile.defined()) { + if (ehFrameAtom->contentType() != DefinedAtom::typeCFI) + continue; + if (ArchHandler::isDwarfCIE(_isBig, ehFrameAtom)) + continue; + + if (const Atom *function = _archHandler.fdeTargetFunction(ehFrameAtom)) + dwarfFrames[function] = ehFrameAtom; + } + } + + /// Every atom defined in __TEXT,__text needs an entry in the final + /// __unwind_info section (in order). These comes from two sources: + /// + Input __compact_unwind sections where possible (after adding the + /// personality function offset which is only known now). + /// + A synthesised reference to __eh_frame if there's no __compact_unwind + /// or too many personality functions to be accommodated. + std::vector<CompactUnwindEntry> createUnwindInfoEntries( + const SimpleFile &mergedFile, + const std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + const std::vector<const Atom *> &personalities, + const std::map<const Atom *, const Atom *> &dwarfFrames) { + std::vector<CompactUnwindEntry> unwindInfos; + + DEBUG(llvm::dbgs() << " Creating __unwind_info entries\n"); + // The final order in the __unwind_info section must be derived from the + // order of typeCode atoms, since that's how they'll be put into the object + // file eventually (yuck!). + for (const DefinedAtom *atom : mergedFile.defined()) { + if (atom->contentType() != DefinedAtom::typeCode) + continue; + + unwindInfos.push_back(finalizeUnwindInfoEntryForAtom( + atom, unwindLocs, personalities, dwarfFrames)); + + DEBUG(llvm::dbgs() << " Entry for " << atom->name() + << ", final encoding=" + << llvm::format("0x%08x", unwindInfos.back().encoding) + << '\n'); + } + + return unwindInfos; + } + + /// Remove unused EH frames. + /// + /// An EH frame is considered unused if there is a corresponding compact + /// unwind atom that doesn't require the EH frame. + void pruneUnusedEHFrames( + SimpleFile &mergedFile, + const std::vector<CompactUnwindEntry> &unwindInfos, + const std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + const std::map<const Atom *, const Atom *> &dwarfFrames) { + + // Worklist of all 'used' FDEs. + std::vector<const DefinedAtom *> usedDwarfWorklist; + + // We have to check two conditions when building the worklist: + // (1) EH frames used by compact unwind entries. + for (auto &entry : unwindInfos) + if (entry.ehFrame) + usedDwarfWorklist.push_back(cast<DefinedAtom>(entry.ehFrame)); + + // (2) EH frames that reference functions with no corresponding compact + // unwind info. + for (auto &entry : dwarfFrames) + if (!unwindLocs.count(entry.first)) + usedDwarfWorklist.push_back(cast<DefinedAtom>(entry.second)); + + // Add all transitively referenced CFI atoms by processing the worklist. + std::set<const Atom *> usedDwarfFrames; + while (!usedDwarfWorklist.empty()) { + const DefinedAtom *cfiAtom = usedDwarfWorklist.back(); + usedDwarfWorklist.pop_back(); + usedDwarfFrames.insert(cfiAtom); + for (const auto *ref : *cfiAtom) { + const DefinedAtom *cfiTarget = dyn_cast<DefinedAtom>(ref->target()); + if (cfiTarget->contentType() == DefinedAtom::typeCFI) + usedDwarfWorklist.push_back(cfiTarget); + } + } + + // Finally, delete all unreferenced CFI atoms. + mergedFile.removeDefinedAtomsIf([&](const DefinedAtom *atom) { + if ((atom->contentType() == DefinedAtom::typeCFI) && + !usedDwarfFrames.count(atom)) + return true; + return false; + }); + } + + CompactUnwindEntry finalizeUnwindInfoEntryForAtom( + const DefinedAtom *function, + const std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + const std::vector<const Atom *> &personalities, + const std::map<const Atom *, const Atom *> &dwarfFrames) { + auto unwindLoc = unwindLocs.find(function); + + CompactUnwindEntry entry; + if (unwindLoc == unwindLocs.end()) { + // Default entry has correct encoding (0 => no unwind), but we need to + // synthesise the function. + entry.rangeStart = function; + entry.rangeLength = function->size(); + } else + entry = unwindLoc->second; + + + // If there's no __compact_unwind entry, or it explicitly says to use + // __eh_frame, we need to try and fill in the correct DWARF atom. + if (entry.encoding == _archHandler.dwarfCompactUnwindType() || + entry.encoding == 0) { + auto dwarfFrame = dwarfFrames.find(function); + if (dwarfFrame != dwarfFrames.end()) { + entry.encoding = _archHandler.dwarfCompactUnwindType(); + entry.ehFrame = dwarfFrame->second; + } + } + + auto personality = std::find(personalities.begin(), personalities.end(), + entry.personalityFunction); + uint32_t personalityIdx = personality == personalities.end() + ? 0 + : personality - personalities.begin() + 1; + + // FIXME: We should also use DWARF when there isn't enough room for the + // personality function in the compact encoding. + assert(personalityIdx < 4 && "too many personality functions"); + + entry.encoding |= personalityIdx << 28; + + if (entry.lsdaLocation) + entry.encoding |= 1U << 30; + + return entry; + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + MachOFile &_file; + bool _isBig; +}; + +void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsCompactUnwindPass()); + pm.add(llvm::make_unique<CompactUnwindPass>(ctx)); +} + +} // end namesapce mach_o +} // end namesapce lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/DebugInfo.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/DebugInfo.h new file mode 100644 index 000000000000..28e41bf4263c --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/DebugInfo.h @@ -0,0 +1,106 @@ +//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_DEBUGINFO_H +#define LLD_READER_WRITER_MACHO_DEBUGINFO_H + +#include "lld/Core/Atom.h" +#include <vector> + +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + + +namespace lld { +namespace mach_o { + +class DebugInfo { +public: + enum class Kind { + Dwarf, + Stabs + }; + + Kind kind() const { return _kind; } + + void setAllocator(std::unique_ptr<llvm::BumpPtrAllocator> allocator) { + _allocator = std::move(allocator); + } + +protected: + DebugInfo(Kind kind) : _kind(kind) {} + +private: + std::unique_ptr<llvm::BumpPtrAllocator> _allocator; + Kind _kind; +}; + +struct TranslationUnitSource { + StringRef name; + StringRef path; +}; + +class DwarfDebugInfo : public DebugInfo { +public: + DwarfDebugInfo(TranslationUnitSource tu) + : DebugInfo(Kind::Dwarf), _tu(std::move(tu)) {} + + static inline bool classof(const DebugInfo *di) { + return di->kind() == Kind::Dwarf; + } + + const TranslationUnitSource &translationUnitSource() const { return _tu; } + +private: + TranslationUnitSource _tu; +}; + +struct Stab { + Stab(const Atom* atom, uint8_t type, uint8_t other, uint16_t desc, + uint32_t value, StringRef str) + : atom(atom), type(type), other(other), desc(desc), value(value), + str(str) {} + + const class Atom* atom; + uint8_t type; + uint8_t other; + uint16_t desc; + uint32_t value; + StringRef str; +}; + +inline raw_ostream& operator<<(raw_ostream &os, Stab &s) { + os << "Stab -- atom: " << llvm::format("%p", s.atom) << ", type: " << (uint32_t)s.type + << ", other: " << (uint32_t)s.other << ", desc: " << s.desc << ", value: " << s.value + << ", str: '" << s.str << "'"; + return os; +} + +class StabsDebugInfo : public DebugInfo { +public: + + typedef std::vector<Stab> StabsList; + + StabsDebugInfo(StabsList stabs) + : DebugInfo(Kind::Stabs), _stabs(std::move(stabs)) {} + + static inline bool classof(const DebugInfo *di) { + return di->kind() == Kind::Stabs; + } + + const StabsList& stabs() const { return _stabs; } + +public: + StabsList _stabs; +}; + +} // end namespace mach_o +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_DEBUGINFO_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h new file mode 100644 index 000000000000..acced33b7e74 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h @@ -0,0 +1,155 @@ +//===- lib/ReaderWriter/MachO/ExecutableAtoms.h ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H +#define LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H + +#include "Atoms.h" +#include "File.h" + +#include "llvm/Support/MachO.h" + +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/Core/UndefinedAtom.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + + +// +// CEntryFile adds an UndefinedAtom for "_main" so that the Resolving +// phase will fail if "_main" is undefined. +// +class CEntryFile : public SimpleFile { +public: + CEntryFile(const MachOLinkingContext &context) + : SimpleFile("C entry", kindCEntryObject), + _undefMain(*this, context.entrySymbolName()) { + this->addAtom(_undefMain); + } + +private: + SimpleUndefinedAtom _undefMain; +}; + + +// +// StubHelperFile adds an UndefinedAtom for "dyld_stub_binder" so that +// the Resolveing phase will fail if "dyld_stub_binder" is undefined. +// +class StubHelperFile : public SimpleFile { +public: + StubHelperFile(const MachOLinkingContext &context) + : SimpleFile("stub runtime", kindStubHelperObject), + _undefBinder(*this, context.binderSymbolName()) { + this->addAtom(_undefBinder); + } + +private: + SimpleUndefinedAtom _undefBinder; +}; + + +// +// MachHeaderAliasFile lazily instantiates the magic symbols that mark the start +// of the mach_header for final linked images. +// +class MachHeaderAliasFile : public SimpleFile { +public: + MachHeaderAliasFile(const MachOLinkingContext &context) + : SimpleFile("mach_header symbols", kindHeaderObject) { + StringRef machHeaderSymbolName; + DefinedAtom::Scope symbolScope = DefinedAtom::scopeLinkageUnit; + StringRef dsoHandleName; + switch (context.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + machHeaderSymbolName = "__mh_object_header"; + break; + case llvm::MachO::MH_EXECUTE: + machHeaderSymbolName = "__mh_execute_header"; + symbolScope = DefinedAtom::scopeGlobal; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_FVMLIB: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_CORE: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_PRELOAD: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_DYLIB: + machHeaderSymbolName = "__mh_dylib_header"; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_DYLINKER: + machHeaderSymbolName = "__mh_dylinker_header"; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_BUNDLE: + machHeaderSymbolName = "__mh_bundle_header"; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_DYLIB_STUB: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_DSYM: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_KEXT_BUNDLE: + dsoHandleName = "___dso_handle"; + break; + } + if (!machHeaderSymbolName.empty()) + _definedAtoms.push_back(new (allocator()) MachODefinedAtom( + *this, machHeaderSymbolName, symbolScope, + DefinedAtom::typeMachHeader, DefinedAtom::mergeNo, false, + true /* noDeadStrip */, + ArrayRef<uint8_t>(), DefinedAtom::Alignment(4096))); + + if (!dsoHandleName.empty()) + _definedAtoms.push_back(new (allocator()) MachODefinedAtom( + *this, dsoHandleName, DefinedAtom::scopeLinkageUnit, + DefinedAtom::typeDSOHandle, DefinedAtom::mergeNo, false, + true /* noDeadStrip */, + ArrayRef<uint8_t>(), DefinedAtom::Alignment(1))); + } + + const AtomRange<DefinedAtom> defined() const override { + return _definedAtoms; + } + const AtomRange<UndefinedAtom> undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange<AbsoluteAtom> absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _definedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + + +private: + mutable AtomVector<DefinedAtom> _definedAtoms; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/File.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/File.h new file mode 100644 index 000000000000..2bdd6342b477 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/File.h @@ -0,0 +1,400 @@ +//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_FILE_H +#define LLD_READER_WRITER_MACHO_FILE_H + +#include "Atoms.h" +#include "DebugInfo.h" +#include "MachONormalizedFile.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Format.h" +#include <unordered_map> + +namespace lld { +namespace mach_o { + +using lld::mach_o::normalized::Section; + +class MachOFile : public SimpleFile { +public: + + /// Real file constructor - for on-disk files. + MachOFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx) + : SimpleFile(mb->getBufferIdentifier(), File::kindMachObject), + _mb(std::move(mb)), _ctx(ctx) {} + + /// Dummy file constructor - for virtual files. + MachOFile(StringRef path) + : SimpleFile(path, File::kindMachObject) {} + + void addDefinedAtom(StringRef name, Atom::Scope scope, + DefinedAtom::ContentType type, DefinedAtom::Merge merge, + uint64_t sectionOffset, uint64_t contentSize, bool thumb, + bool noDeadStrip, bool copyRefs, + const Section *inSection) { + assert(sectionOffset+contentSize <= inSection->content.size()); + ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset, + contentSize); + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + content = content.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % inSection->alignment); + auto *atom = + new (allocator()) MachODefinedAtom(*this, name, scope, type, merge, + thumb, noDeadStrip, content, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addDefinedAtomInCustomSection(StringRef name, Atom::Scope scope, + DefinedAtom::ContentType type, DefinedAtom::Merge merge, + bool thumb, bool noDeadStrip, uint64_t sectionOffset, + uint64_t contentSize, StringRef sectionName, + bool copyRefs, const Section *inSection) { + assert(sectionOffset+contentSize <= inSection->content.size()); + ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset, + contentSize); + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + content = content.copy(allocator()); + sectionName = sectionName.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % inSection->alignment); + auto *atom = + new (allocator()) MachODefinedCustomSectionAtom(*this, name, scope, type, + merge, thumb, + noDeadStrip, content, + sectionName, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addZeroFillDefinedAtom(StringRef name, Atom::Scope scope, + uint64_t sectionOffset, uint64_t size, + bool noDeadStrip, bool copyRefs, + const Section *inSection) { + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % inSection->alignment); + + DefinedAtom::ContentType type = DefinedAtom::typeUnknown; + switch (inSection->type) { + case llvm::MachO::S_ZEROFILL: + type = DefinedAtom::typeZeroFill; + break; + case llvm::MachO::S_THREAD_LOCAL_ZEROFILL: + type = DefinedAtom::typeTLVInitialZeroFill; + break; + default: + llvm_unreachable("Unrecognized zero-fill section"); + } + + auto *atom = + new (allocator()) MachODefinedAtom(*this, name, scope, type, size, + noDeadStrip, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addUndefinedAtom(StringRef name, bool copyRefs) { + if (copyRefs) { + // Make a copy of the atom's name that is owned by this file. + name = name.copy(allocator()); + } + auto *atom = new (allocator()) SimpleUndefinedAtom(*this, name); + addAtom(*atom); + _undefAtoms[name] = atom; + } + + void addTentativeDefAtom(StringRef name, Atom::Scope scope, uint64_t size, + DefinedAtom::Alignment align, bool copyRefs) { + if (copyRefs) { + // Make a copy of the atom's name that is owned by this file. + name = name.copy(allocator()); + } + auto *atom = + new (allocator()) MachOTentativeDefAtom(*this, name, scope, size, align); + addAtom(*atom); + _undefAtoms[name] = atom; + } + + /// Search this file for an the atom from 'section' that covers + /// 'offsetInSect'. Returns nullptr is no atom found. + MachODefinedAtom *findAtomCoveringAddress(const Section §ion, + uint64_t offsetInSect, + uint32_t *foundOffsetAtom=nullptr) { + const auto &pos = _sectionAtoms.find(§ion); + if (pos == _sectionAtoms.end()) + return nullptr; + const auto &vec = pos->second; + assert(offsetInSect < section.content.size()); + // Vector of atoms for section are already sorted, so do binary search. + const auto &atomPos = std::lower_bound(vec.begin(), vec.end(), offsetInSect, + [offsetInSect](const SectionOffsetAndAtom &ao, + uint64_t targetAddr) -> bool { + // Each atom has a start offset of its slice of the + // section's content. This compare function must return true + // iff the atom's range is before the offset being searched for. + uint64_t atomsEndOffset = ao.offset+ao.atom->rawContent().size(); + return (atomsEndOffset <= offsetInSect); + }); + if (atomPos == vec.end()) + return nullptr; + if (foundOffsetAtom) + *foundOffsetAtom = offsetInSect - atomPos->offset; + return atomPos->atom; + } + + /// Searches this file for an UndefinedAtom named 'name'. Returns + /// nullptr is no such atom found. + const lld::Atom *findUndefAtom(StringRef name) { + auto pos = _undefAtoms.find(name); + if (pos == _undefAtoms.end()) + return nullptr; + return pos->second; + } + + typedef std::function<void (MachODefinedAtom* atom)> DefinedAtomVisitor; + + void eachDefinedAtom(DefinedAtomVisitor vistor) { + for (auto §AndAtoms : _sectionAtoms) { + for (auto &offAndAtom : sectAndAtoms.second) { + vistor(offAndAtom.atom); + } + } + } + + typedef std::function<void(MachODefinedAtom *atom, uint64_t offset)> + SectionAtomVisitor; + + void eachAtomInSection(const Section §ion, SectionAtomVisitor visitor) { + auto pos = _sectionAtoms.find(§ion); + if (pos == _sectionAtoms.end()) + return; + auto vec = pos->second; + + for (auto &offAndAtom : vec) + visitor(offAndAtom.atom, offAndAtom.offset); + } + + MachOLinkingContext::Arch arch() const { return _arch; } + void setArch(MachOLinkingContext::Arch arch) { _arch = arch; } + + MachOLinkingContext::OS OS() const { return _os; } + void setOS(MachOLinkingContext::OS os) { _os = os; } + + MachOLinkingContext::ObjCConstraint objcConstraint() const { + return _objcConstraint; + } + void setObjcConstraint(MachOLinkingContext::ObjCConstraint v) { + _objcConstraint = v; + } + + uint32_t minVersion() const { return _minVersion; } + void setMinVersion(uint32_t v) { _minVersion = v; } + + LoadCommandType minVersionLoadCommandKind() const { + return _minVersionLoadCommandKind; + } + void setMinVersionLoadCommandKind(LoadCommandType v) { + _minVersionLoadCommandKind = v; + } + + uint32_t swiftVersion() const { return _swiftVersion; } + void setSwiftVersion(uint32_t v) { _swiftVersion = v; } + + bool subsectionsViaSymbols() const { + return _flags & llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; + } + void setFlags(normalized::FileFlags v) { _flags = v; } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const File *F) { + return F->kind() == File::kindMachObject; + } + + void setDebugInfo(std::unique_ptr<DebugInfo> debugInfo) { + _debugInfo = std::move(debugInfo); + } + + DebugInfo* debugInfo() const { return _debugInfo.get(); } + std::unique_ptr<DebugInfo> takeDebugInfo() { return std::move(_debugInfo); } + +protected: + std::error_code doParse() override { + // Convert binary file to normalized mach-o. + auto normFile = normalized::readBinary(_mb, _ctx->arch()); + if (auto ec = normFile.takeError()) + return llvm::errorToErrorCode(std::move(ec)); + // Convert normalized mach-o to atoms. + if (auto ec = normalized::normalizedObjectToAtoms(this, **normFile, false)) + return llvm::errorToErrorCode(std::move(ec)); + return std::error_code(); + } + +private: + struct SectionOffsetAndAtom { uint64_t offset; MachODefinedAtom *atom; }; + + void addAtomForSection(const Section *inSection, MachODefinedAtom* atom, + uint64_t sectionOffset) { + SectionOffsetAndAtom offAndAtom; + offAndAtom.offset = sectionOffset; + offAndAtom.atom = atom; + _sectionAtoms[inSection].push_back(offAndAtom); + addAtom(*atom); + } + + typedef llvm::DenseMap<const normalized::Section *, + std::vector<SectionOffsetAndAtom>> SectionToAtoms; + typedef llvm::StringMap<const lld::Atom *> NameToAtom; + + std::unique_ptr<MemoryBuffer> _mb; + MachOLinkingContext *_ctx; + SectionToAtoms _sectionAtoms; + NameToAtom _undefAtoms; + MachOLinkingContext::Arch _arch = MachOLinkingContext::arch_unknown; + MachOLinkingContext::OS _os = MachOLinkingContext::OS::unknown; + uint32_t _minVersion = 0; + LoadCommandType _minVersionLoadCommandKind = (LoadCommandType)0; + MachOLinkingContext::ObjCConstraint _objcConstraint = + MachOLinkingContext::objc_unknown; + uint32_t _swiftVersion = 0; + normalized::FileFlags _flags = llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; + std::unique_ptr<DebugInfo> _debugInfo; +}; + +class MachODylibFile : public SharedLibraryFile { +public: + MachODylibFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx) + : SharedLibraryFile(mb->getBufferIdentifier()), + _mb(std::move(mb)), _ctx(ctx) {} + + MachODylibFile(StringRef path) : SharedLibraryFile(path) {} + + OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const override { + // Pass down _installName so that if this requested symbol + // is re-exported through this dylib, the SharedLibraryAtom's loadName() + // is this dylib installName and not the implementation dylib's. + // NOTE: isData is not needed for dylibs (it matters for static libs). + return exports(name, _installName); + } + + /// Adds symbol name that this dylib exports. The corresponding + /// SharedLibraryAtom is created lazily (since most symbols are not used). + void addExportedSymbol(StringRef name, bool weakDef, bool copyRefs) { + if (copyRefs) { + name = name.copy(allocator()); + } + AtomAndFlags info(weakDef); + _nameToAtom[name] = info; + } + + void addReExportedDylib(StringRef dylibPath) { + _reExportedDylibs.emplace_back(dylibPath); + } + + StringRef installName() const { return _installName; } + uint32_t currentVersion() { return _currentVersion; } + uint32_t compatVersion() { return _compatVersion; } + + void setInstallName(StringRef name) { _installName = name; } + void setCompatVersion(uint32_t version) { _compatVersion = version; } + void setCurrentVersion(uint32_t version) { _currentVersion = version; } + + typedef std::function<MachODylibFile *(StringRef)> FindDylib; + + void loadReExportedDylibs(FindDylib find) { + for (ReExportedDylib &entry : _reExportedDylibs) { + entry.file = find(entry.path); + } + } + + StringRef getDSOName() const override { return _installName; } + + std::error_code doParse() override { + // Convert binary file to normalized mach-o. + auto normFile = normalized::readBinary(_mb, _ctx->arch()); + if (auto ec = normFile.takeError()) + return llvm::errorToErrorCode(std::move(ec)); + // Convert normalized mach-o to atoms. + if (auto ec = normalized::normalizedDylibToAtoms(this, **normFile, false)) + return llvm::errorToErrorCode(std::move(ec)); + return std::error_code(); + } + +private: + OwningAtomPtr<SharedLibraryAtom> exports(StringRef name, + StringRef installName) const { + // First, check if requested symbol is directly implemented by this dylib. + auto entry = _nameToAtom.find(name); + if (entry != _nameToAtom.end()) { + // FIXME: Make this map a set and only used in assert builds. + // Note, its safe to assert here as the resolver is the only client of + // this API and it only requests exports for undefined symbols. + // If we return from here we are no longer undefined so we should never + // get here again. + assert(!entry->second.atom && "Duplicate shared library export"); + bool weakDef = entry->second.weakDef; + auto *atom = new (allocator()) MachOSharedLibraryAtom(*this, name, + installName, + weakDef); + entry->second.atom = atom; + return atom; + } + + // Next, check if symbol is implemented in some re-exported dylib. + for (const ReExportedDylib &dylib : _reExportedDylibs) { + assert(dylib.file); + auto atom = dylib.file->exports(name, installName); + if (atom.get()) + return atom; + } + + // Symbol not exported or re-exported by this dylib. + return nullptr; + } + + struct ReExportedDylib { + ReExportedDylib(StringRef p) : path(p), file(nullptr) { } + StringRef path; + MachODylibFile *file; + }; + + struct AtomAndFlags { + AtomAndFlags() : atom(nullptr), weakDef(false) { } + AtomAndFlags(bool weak) : atom(nullptr), weakDef(weak) { } + const SharedLibraryAtom *atom; + bool weakDef; + }; + + std::unique_ptr<MemoryBuffer> _mb; + MachOLinkingContext *_ctx; + StringRef _installName; + uint32_t _currentVersion; + uint32_t _compatVersion; + std::vector<ReExportedDylib> _reExportedDylibs; + mutable std::unordered_map<StringRef, AtomAndFlags> _nameToAtom; +}; + +} // end namespace mach_o +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_FILE_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h new file mode 100644 index 000000000000..76d295841c9d --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h @@ -0,0 +1,61 @@ +//===- lib/ReaderWriter/MachO/FlatNamespaceFile.h -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H +#define LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H + +#include "lld/Core/SharedLibraryFile.h" +#include "llvm/Support/Debug.h" + +namespace lld { +namespace mach_o { + +// +// A FlateNamespaceFile instance may be added as a resolution source of last +// resort, depending on how -flat_namespace and -undefined are set. +// +class FlatNamespaceFile : public SharedLibraryFile { +public: + FlatNamespaceFile(const MachOLinkingContext &context) + : SharedLibraryFile("flat namespace") { } + + OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const override { + return new (allocator()) MachOSharedLibraryAtom(*this, name, getDSOName(), + false); + } + + StringRef getDSOName() const override { return "flat-namespace"; } + + const AtomRange<DefinedAtom> defined() const override { + return _noDefinedAtoms; + } + const AtomRange<UndefinedAtom> undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange<AbsoluteAtom> absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _noDefinedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp new file mode 100644 index 000000000000..8458a1c79282 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp @@ -0,0 +1,184 @@ +//===- lib/ReaderWriter/MachO/GOTPass.cpp -----------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This linker pass transforms all GOT kind references to real references. +/// That is, in assembly you can write something like: +/// movq foo@GOTPCREL(%rip), %rax +/// which means you want to load a pointer to "foo" out of the GOT (global +/// Offsets Table). In the object file, the Atom containing this instruction +/// has a Reference whose target is an Atom named "foo" and the Reference +/// kind is a GOT load. The linker needs to instantiate a pointer sized +/// GOT entry. This is done be creating a GOT Atom to represent that pointer +/// sized data in this pass, and altering the Atom graph so the Reference now +/// points to the GOT Atom entry (corresponding to "foo") and changing the +/// Reference Kind to reflect it is now pointing to a GOT entry (rather +/// then needing a GOT entry). +/// +/// There is one optimization the linker can do here. If the target of the GOT +/// is in the same linkage unit and does not need to be interposable, and +/// the GOT use is just a load (not some other operation), this pass can +/// transform that load into an LEA (add). This optimizes away one memory load +/// which at runtime that could stall the pipeline. This optimization only +/// works for architectures in which a (GOT) load instruction can be change to +/// an LEA instruction that is the same size. The method isGOTAccess() should +/// only return true for "canBypassGOT" if this optimization is supported. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + +// +// GOT Entry Atom created by the GOT pass. +// +class GOTEntryAtom : public SimpleDefinedAtom { +public: + GOTEntryAtom(const File &file, bool is64, StringRef name) + : SimpleDefinedAtom(file), _is64(is64), _name(name) { } + + ~GOTEntryAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeGOT; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + + StringRef slotName() const { + return _name; + } + +private: + const bool _is64; + StringRef _name; +}; + +/// Pass for instantiating and optimizing GOT slots. +/// +class GOTPass : public Pass { +public: + GOTPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _file(*_ctx.make_file<MachOFile>("<mach-o GOT Pass>")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + +private: + llvm::Error perform(SimpleFile &mergedFile) override { + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + // Look at instructions accessing the GOT. + bool canBypassGOT; + if (!_archHandler.isGOTAccess(*ref, canBypassGOT)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + + if (!shouldReplaceTargetWithGOTAtom(target, canBypassGOT)) { + // Update reference kind to reflect that target is a direct accesss. + _archHandler.updateReferenceToGOT(ref, false); + } else { + // Replace the target with a reference to a GOT entry. + const DefinedAtom *gotEntry = makeGOTEntry(target); + const_cast<Reference *>(ref)->setTarget(gotEntry); + // Update reference kind to reflect that target is now a GOT entry. + _archHandler.updateReferenceToGOT(ref, true); + } + } + } + + // Sort and add all created GOT Atoms to master file + std::vector<const GOTEntryAtom *> entries; + entries.reserve(_targetToGOT.size()); + for (auto &it : _targetToGOT) + entries.push_back(it.second); + std::sort(entries.begin(), entries.end(), + [](const GOTEntryAtom *left, const GOTEntryAtom *right) { + return (left->slotName().compare(right->slotName()) < 0); + }); + for (const GOTEntryAtom *slot : entries) + mergedFile.addAtom(*slot); + + return llvm::Error::success(); + } + + bool shouldReplaceTargetWithGOTAtom(const Atom *target, bool canBypassGOT) { + // Accesses to shared library symbols must go through GOT. + if (isa<SharedLibraryAtom>(target)) + return true; + // Accesses to interposable symbols in same linkage unit must also go + // through GOT. + const DefinedAtom *defTarget = dyn_cast<DefinedAtom>(target); + if (defTarget != nullptr && + defTarget->interposable() != DefinedAtom::interposeNo) { + assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit); + return true; + } + // Target does not require indirection. So, if instruction allows GOT to be + // by-passed, do that optimization and don't create GOT entry. + return !canBypassGOT; + } + + const DefinedAtom *makeGOTEntry(const Atom *target) { + auto pos = _targetToGOT.find(target); + if (pos == _targetToGOT.end()) { + auto *gotEntry = new (_file.allocator()) + GOTEntryAtom(_file, _ctx.is64Bit(), target->name()); + _targetToGOT[target] = gotEntry; + const ArchHandler::ReferenceInfo &nlInfo = _archHandler.stubInfo(). + nonLazyPointerReferenceToBinder; + gotEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch, + nlInfo.kind, 0, target, 0); + return gotEntry; + } + return pos->second; + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + MachOFile &_file; + llvm::DenseMap<const Atom*, const GOTEntryAtom*> _targetToGOT; +}; + +void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsGOTPass()); + pm.add(llvm::make_unique<GOTPass>(ctx)); +} + +} // end namesapce mach_o +} // end namesapce lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp new file mode 100644 index 000000000000..24dbf79d3e3b --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp @@ -0,0 +1,489 @@ +//===-- ReaderWriter/MachO/LayoutPass.cpp - Layout atoms ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "LayoutPass.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/Parallel.h" +#include "lld/Core/PassManager.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" +#include <algorithm> +#include <set> +#include <utility> + +using namespace lld; + +#define DEBUG_TYPE "LayoutPass" + +namespace lld { +namespace mach_o { + +static bool compareAtoms(const LayoutPass::SortKey &, + const LayoutPass::SortKey &, + LayoutPass::SortOverride customSorter); + +#ifndef NDEBUG +// Return "reason (leftval, rightval)" +static std::string formatReason(StringRef reason, int leftVal, int rightVal) { + return (Twine(reason) + " (" + Twine(leftVal) + ", " + Twine(rightVal) + ")") + .str(); +} + +// Less-than relationship of two atoms must be transitive, which is, if a < b +// and b < c, a < c must be true. This function checks the transitivity by +// checking the sort results. +static void checkTransitivity(std::vector<LayoutPass::SortKey> &vec, + LayoutPass::SortOverride customSorter) { + for (auto i = vec.begin(), e = vec.end(); (i + 1) != e; ++i) { + for (auto j = i + 1; j != e; ++j) { + assert(compareAtoms(*i, *j, customSorter)); + assert(!compareAtoms(*j, *i, customSorter)); + } + } +} + +// Helper functions to check follow-on graph. +typedef llvm::DenseMap<const DefinedAtom *, const DefinedAtom *> AtomToAtomT; + +static std::string atomToDebugString(const Atom *atom) { + const DefinedAtom *definedAtom = dyn_cast<DefinedAtom>(atom); + std::string str; + llvm::raw_string_ostream s(str); + if (definedAtom->name().empty()) + s << "<anonymous " << definedAtom << ">"; + else + s << definedAtom->name(); + s << " in "; + if (definedAtom->customSectionName().empty()) + s << "<anonymous>"; + else + s << definedAtom->customSectionName(); + s.flush(); + return str; +} + +static void showCycleDetectedError(const Registry ®istry, + AtomToAtomT &followOnNexts, + const DefinedAtom *atom) { + const DefinedAtom *start = atom; + llvm::dbgs() << "There's a cycle in a follow-on chain!\n"; + do { + llvm::dbgs() << " " << atomToDebugString(atom) << "\n"; + for (const Reference *ref : *atom) { + StringRef kindValStr; + if (!registry.referenceKindToString(ref->kindNamespace(), ref->kindArch(), + ref->kindValue(), kindValStr)) { + kindValStr = "<unknown>"; + } + llvm::dbgs() << " " << kindValStr + << ": " << atomToDebugString(ref->target()) << "\n"; + } + atom = followOnNexts[atom]; + } while (atom != start); + llvm::report_fatal_error("Cycle detected"); +} + +/// Exit if there's a cycle in a followon chain reachable from the +/// given root atom. Uses the tortoise and hare algorithm to detect a +/// cycle. +static void checkNoCycleInFollowonChain(const Registry ®istry, + AtomToAtomT &followOnNexts, + const DefinedAtom *root) { + const DefinedAtom *tortoise = root; + const DefinedAtom *hare = followOnNexts[root]; + while (true) { + if (!tortoise || !hare) + return; + if (tortoise == hare) + showCycleDetectedError(registry, followOnNexts, tortoise); + tortoise = followOnNexts[tortoise]; + hare = followOnNexts[followOnNexts[hare]]; + } +} + +static void checkReachabilityFromRoot(AtomToAtomT &followOnRoots, + const DefinedAtom *atom) { + if (!atom) return; + auto i = followOnRoots.find(atom); + if (i == followOnRoots.end()) { + llvm_unreachable(((Twine("Atom <") + atomToDebugString(atom) + + "> has no follow-on root!")) + .str() + .c_str()); + } + const DefinedAtom *ap = i->second; + while (true) { + const DefinedAtom *next = followOnRoots[ap]; + if (!next) { + llvm_unreachable((Twine("Atom <" + atomToDebugString(atom) + + "> is not reachable from its root!")) + .str() + .c_str()); + } + if (next == ap) + return; + ap = next; + } +} + +static void printDefinedAtoms(const File::AtomRange<DefinedAtom> &atomRange) { + for (const DefinedAtom *atom : atomRange) { + llvm::dbgs() << " file=" << atom->file().path() + << ", name=" << atom->name() + << ", size=" << atom->size() + << ", type=" << atom->contentType() + << ", ordinal=" << atom->ordinal() + << "\n"; + } +} + +/// Verify that the followon chain is sane. Should not be called in +/// release binary. +void LayoutPass::checkFollowonChain(const File::AtomRange<DefinedAtom> &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::checkFollowonChain"); + + // Verify that there's no cycle in follow-on chain. + std::set<const DefinedAtom *> roots; + for (const auto &ai : _followOnRoots) + roots.insert(ai.second); + for (const DefinedAtom *root : roots) + checkNoCycleInFollowonChain(_registry, _followOnNexts, root); + + // Verify that all the atoms in followOnNexts have references to + // their roots. + for (const auto &ai : _followOnNexts) { + checkReachabilityFromRoot(_followOnRoots, ai.first); + checkReachabilityFromRoot(_followOnRoots, ai.second); + } +} +#endif // #ifndef NDEBUG + +/// The function compares atoms by sorting atoms in the following order +/// a) Sorts atoms by their ordinal overrides (layout-after/ingroup) +/// b) Sorts atoms by their permissions +/// c) Sorts atoms by their content +/// d) Sorts atoms by custom sorter +/// e) Sorts atoms on how they appear using File Ordinality +/// f) Sorts atoms on how they appear within the File +static bool compareAtomsSub(const LayoutPass::SortKey &lc, + const LayoutPass::SortKey &rc, + LayoutPass::SortOverride customSorter, + std::string &reason) { + const DefinedAtom *left = lc._atom.get(); + const DefinedAtom *right = rc._atom.get(); + if (left == right) { + reason = "same"; + return false; + } + + // Find the root of the chain if it is a part of a follow-on chain. + const DefinedAtom *leftRoot = lc._root; + const DefinedAtom *rightRoot = rc._root; + + // Sort atoms by their ordinal overrides only if they fall in the same + // chain. + if (leftRoot == rightRoot) { + DEBUG(reason = formatReason("override", lc._override, rc._override)); + return lc._override < rc._override; + } + + // Sort same permissions together. + DefinedAtom::ContentPermissions leftPerms = leftRoot->permissions(); + DefinedAtom::ContentPermissions rightPerms = rightRoot->permissions(); + + if (leftPerms != rightPerms) { + DEBUG(reason = + formatReason("contentPerms", (int)leftPerms, (int)rightPerms)); + return leftPerms < rightPerms; + } + + // Sort same content types together. + DefinedAtom::ContentType leftType = leftRoot->contentType(); + DefinedAtom::ContentType rightType = rightRoot->contentType(); + + if (leftType != rightType) { + DEBUG(reason = formatReason("contentType", (int)leftType, (int)rightType)); + return leftType < rightType; + } + + // Use custom sorter if supplied. + if (customSorter) { + bool leftBeforeRight; + if (customSorter(leftRoot, rightRoot, leftBeforeRight)) + return leftBeforeRight; + } + + // Sort by .o order. + const File *leftFile = &leftRoot->file(); + const File *rightFile = &rightRoot->file(); + + if (leftFile != rightFile) { + DEBUG(reason = formatReason(".o order", (int)leftFile->ordinal(), + (int)rightFile->ordinal())); + return leftFile->ordinal() < rightFile->ordinal(); + } + + // Sort by atom order with .o file. + uint64_t leftOrdinal = leftRoot->ordinal(); + uint64_t rightOrdinal = rightRoot->ordinal(); + + if (leftOrdinal != rightOrdinal) { + DEBUG(reason = formatReason("ordinal", (int)leftRoot->ordinal(), + (int)rightRoot->ordinal())); + return leftOrdinal < rightOrdinal; + } + + llvm::errs() << "Unordered: <" << left->name() << "> <" + << right->name() << ">\n"; + llvm_unreachable("Atoms with Same Ordinal!"); +} + +static bool compareAtoms(const LayoutPass::SortKey &lc, + const LayoutPass::SortKey &rc, + LayoutPass::SortOverride customSorter) { + std::string reason; + bool result = compareAtomsSub(lc, rc, customSorter, reason); + DEBUG({ + StringRef comp = result ? "<" : ">="; + llvm::dbgs() << "Layout: '" << lc._atom.get()->name() + << "' " << comp << " '" + << rc._atom.get()->name() << "' (" << reason << ")\n"; + }); + return result; +} + +LayoutPass::LayoutPass(const Registry ®istry, SortOverride sorter) + : _registry(registry), _customSorter(std::move(sorter)) {} + +// Returns the atom immediately followed by the given atom in the followon +// chain. +const DefinedAtom *LayoutPass::findAtomFollowedBy( + const DefinedAtom *targetAtom) { + // Start from the beginning of the chain and follow the chain until + // we find the targetChain. + const DefinedAtom *atom = _followOnRoots[targetAtom]; + while (true) { + const DefinedAtom *prevAtom = atom; + AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom); + // The target atom must be in the chain of its root. + assert(targetFollowOnAtomsIter != _followOnNexts.end()); + atom = targetFollowOnAtomsIter->second; + if (atom == targetAtom) + return prevAtom; + } +} + +// Check if all the atoms followed by the given target atom are of size zero. +// When this method is called, an atom being added is not of size zero and +// will be added to the head of the followon chain. All the atoms between the +// atom and the targetAtom (specified by layout-after) need to be of size zero +// in this case. Otherwise the desired layout is impossible. +bool LayoutPass::checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom) { + const DefinedAtom *atom = _followOnRoots[targetAtom]; + while (true) { + if (atom == targetAtom) + return true; + if (atom->size() != 0) + // TODO: print warning that an impossible layout is being desired by the + // user. + return false; + AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom); + // The target atom must be in the chain of its root. + assert(targetFollowOnAtomsIter != _followOnNexts.end()); + atom = targetFollowOnAtomsIter->second; + } +} + +// Set the root of all atoms in targetAtom's chain to the given root. +void LayoutPass::setChainRoot(const DefinedAtom *targetAtom, + const DefinedAtom *root) { + // Walk through the followon chain and override each node's root. + while (true) { + _followOnRoots[targetAtom] = root; + AtomToAtomT::iterator targetFollowOnAtomsIter = + _followOnNexts.find(targetAtom); + if (targetFollowOnAtomsIter == _followOnNexts.end()) + return; + targetAtom = targetFollowOnAtomsIter->second; + } +} + +/// This pass builds the followon tables described by two DenseMaps +/// followOnRoots and followonNexts. +/// The followOnRoots map contains a mapping of a DefinedAtom to its root +/// The followOnNexts map contains a mapping of what DefinedAtom follows the +/// current Atom +/// The algorithm follows a very simple approach +/// a) If the atom is first seen, then make that as the root atom +/// b) The targetAtom which this Atom contains, has the root thats set to the +/// root of the current atom +/// c) If the targetAtom is part of a different tree and the root of the +/// targetAtom is itself, Chain all the atoms that are contained in the tree +/// to the current Tree +/// d) If the targetAtom is part of a different chain and the root of the +/// targetAtom until the targetAtom has all atoms of size 0, then chain the +/// targetAtoms and its tree to the current chain +void LayoutPass::buildFollowOnTable(const File::AtomRange<DefinedAtom> &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::buildFollowOnTable"); + // Set the initial size of the followon and the followonNext hash to the + // number of atoms that we have. + _followOnRoots.reserve(range.size()); + _followOnNexts.reserve(range.size()); + for (const DefinedAtom *ai : range) { + for (const Reference *r : *ai) { + if (r->kindNamespace() != lld::Reference::KindNamespace::all || + r->kindValue() != lld::Reference::kindLayoutAfter) + continue; + const DefinedAtom *targetAtom = dyn_cast<DefinedAtom>(r->target()); + _followOnNexts[ai] = targetAtom; + + // If we find a followon for the first time, let's make that atom as the + // root atom. + if (_followOnRoots.count(ai) == 0) + _followOnRoots[ai] = ai; + + auto iter = _followOnRoots.find(targetAtom); + if (iter == _followOnRoots.end()) { + // If the targetAtom is not a root of any chain, let's make the root of + // the targetAtom to the root of the current chain. + + // The expression m[i] = m[j] where m is a DenseMap and i != j is not + // safe. m[j] returns a reference, which would be invalidated when a + // rehashing occurs. If rehashing occurs to make room for m[i], m[j] + // becomes invalid, and that invalid reference would be used as the RHS + // value of the expression. + // Copy the value to workaround. + const DefinedAtom *tmp = _followOnRoots[ai]; + _followOnRoots[targetAtom] = tmp; + continue; + } + if (iter->second == targetAtom) { + // If the targetAtom is the root of a chain, the chain becomes part of + // the current chain. Rewrite the subchain's root to the current + // chain's root. + setChainRoot(targetAtom, _followOnRoots[ai]); + continue; + } + // The targetAtom is already a part of a chain. If the current atom is + // of size zero, we can insert it in the middle of the chain just + // before the target atom, while not breaking other atom's followon + // relationships. If it's not, we can only insert the current atom at + // the beginning of the chain. All the atoms followed by the target + // atom must be of size zero in that case to satisfy the followon + // relationships. + size_t currentAtomSize = ai->size(); + if (currentAtomSize == 0) { + const DefinedAtom *targetPrevAtom = findAtomFollowedBy(targetAtom); + _followOnNexts[targetPrevAtom] = ai; + const DefinedAtom *tmp = _followOnRoots[targetPrevAtom]; + _followOnRoots[ai] = tmp; + continue; + } + if (!checkAllPrevAtomsZeroSize(targetAtom)) + break; + _followOnNexts[ai] = _followOnRoots[targetAtom]; + setChainRoot(_followOnRoots[targetAtom], _followOnRoots[ai]); + } + } +} + +/// Build an ordinal override map by traversing the followon chain, and +/// assigning ordinals to each atom, if the atoms have their ordinals +/// already assigned skip the atom and move to the next. This is the +/// main map thats used to sort the atoms while comparing two atoms together +void +LayoutPass::buildOrdinalOverrideMap(const File::AtomRange<DefinedAtom> &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::buildOrdinalOverrideMap"); + uint64_t index = 0; + for (const DefinedAtom *ai : range) { + const DefinedAtom *atom = ai; + if (_ordinalOverrideMap.find(atom) != _ordinalOverrideMap.end()) + continue; + AtomToAtomT::iterator start = _followOnRoots.find(atom); + if (start == _followOnRoots.end()) + continue; + for (const DefinedAtom *nextAtom = start->second; nextAtom; + nextAtom = _followOnNexts[nextAtom]) { + AtomToOrdinalT::iterator pos = _ordinalOverrideMap.find(nextAtom); + if (pos == _ordinalOverrideMap.end()) + _ordinalOverrideMap[nextAtom] = index++; + } + } +} + +std::vector<LayoutPass::SortKey> +LayoutPass::decorate(File::AtomRange<DefinedAtom> &atomRange) const { + std::vector<SortKey> ret; + for (OwningAtomPtr<DefinedAtom> &atom : atomRange.owning_ptrs()) { + auto ri = _followOnRoots.find(atom.get()); + auto oi = _ordinalOverrideMap.find(atom.get()); + const auto *root = (ri == _followOnRoots.end()) ? atom.get() : ri->second; + uint64_t override = (oi == _ordinalOverrideMap.end()) ? 0 : oi->second; + ret.push_back(SortKey(std::move(atom), root, override)); + } + return ret; +} + +void LayoutPass::undecorate(File::AtomRange<DefinedAtom> &atomRange, + std::vector<SortKey> &keys) const { + size_t i = 0; + for (SortKey &k : keys) + atomRange[i++] = std::move(k._atom); +} + +/// Perform the actual pass +llvm::Error LayoutPass::perform(SimpleFile &mergedFile) { + DEBUG(llvm::dbgs() << "******** Laying out atoms:\n"); + // sort the atoms + ScopedTask task(getDefaultDomain(), "LayoutPass"); + File::AtomRange<DefinedAtom> atomRange = mergedFile.defined(); + + // Build follow on tables + buildFollowOnTable(atomRange); + + // Check the structure of followon graph if running in debug mode. + DEBUG(checkFollowonChain(atomRange)); + + // Build override maps + buildOrdinalOverrideMap(atomRange); + + DEBUG({ + llvm::dbgs() << "unsorted atoms:\n"; + printDefinedAtoms(atomRange); + }); + + std::vector<LayoutPass::SortKey> vec = decorate(atomRange); + parallel_sort(vec.begin(), vec.end(), + [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { + return compareAtoms(l, r, _customSorter); + }); + DEBUG(checkTransitivity(vec, _customSorter)); + undecorate(atomRange, vec); + + DEBUG({ + llvm::dbgs() << "sorted atoms:\n"; + printDefinedAtoms(atomRange); + }); + + DEBUG(llvm::dbgs() << "******** Finished laying out atoms\n"); + return llvm::Error::success(); +} + +void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(llvm::make_unique<LayoutPass>( + ctx.registry(), [&](const DefinedAtom * left, const DefinedAtom * right, + bool & leftBeforeRight) ->bool { + return ctx.customAtomOrderer(left, right, leftBeforeRight); + })); +} + +} // namespace mach_o +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.h new file mode 100644 index 000000000000..c18777eded0a --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.h @@ -0,0 +1,119 @@ +//===------ lib/ReaderWriter/MachO/LayoutPass.h - Handles Layout of atoms -===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_LAYOUT_PASS_H +#define LLD_READER_WRITER_MACHO_LAYOUT_PASS_H + +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include <map> +#include <string> +#include <vector> + +namespace lld { +class DefinedAtom; +class SimpleFile; + +namespace mach_o { + +/// This linker pass does the layout of the atoms. The pass is done after the +/// order their .o files were found on the command line, then by order of the +/// atoms (address) in the .o file. But some atoms have a preferred location +/// in their section (such as pinned to the start or end of the section), so +/// the sort must take that into account too. +class LayoutPass : public Pass { +public: + struct SortKey { + SortKey(OwningAtomPtr<DefinedAtom> &&atom, + const DefinedAtom *root, uint64_t override) + : _atom(std::move(atom)), _root(root), _override(override) {} + OwningAtomPtr<DefinedAtom> _atom; + const DefinedAtom *_root; + uint64_t _override; + + // Note, these are only here to appease MSVC bots which didn't like + // the same methods being implemented/deleted in OwningAtomPtr. + SortKey(SortKey &&key) : _atom(std::move(key._atom)), _root(key._root), + _override(key._override) { + key._root = nullptr; + } + + SortKey &operator=(SortKey &&key) { + _atom = std::move(key._atom); + _root = key._root; + key._root = nullptr; + _override = key._override; + return *this; + } + + private: + SortKey(const SortKey &) = delete; + void operator=(const SortKey&) = delete; + }; + + typedef std::function<bool (const DefinedAtom *left, const DefinedAtom *right, + bool &leftBeforeRight)> SortOverride; + + LayoutPass(const Registry ®istry, SortOverride sorter); + + /// Sorts atoms in mergedFile by content type then by command line order. + llvm::Error perform(SimpleFile &mergedFile) override; + + ~LayoutPass() override = default; + +private: + // Build the followOn atoms chain as specified by the kindLayoutAfter + // reference type + void buildFollowOnTable(const File::AtomRange<DefinedAtom> &range); + + // Build a map of Atoms to ordinals for sorting the atoms + void buildOrdinalOverrideMap(const File::AtomRange<DefinedAtom> &range); + + const Registry &_registry; + SortOverride _customSorter; + + typedef llvm::DenseMap<const DefinedAtom *, const DefinedAtom *> AtomToAtomT; + typedef llvm::DenseMap<const DefinedAtom *, uint64_t> AtomToOrdinalT; + + // A map to be used to sort atoms. It represents the order of atoms in the + // result; if Atom X is mapped to atom Y in this map, X will be located + // immediately before Y in the output file. Y might be mapped to another + // atom, constructing a follow-on chain. An atom cannot be mapped to more + // than one atom unless all but one atom are of size zero. + AtomToAtomT _followOnNexts; + + // A map to be used to sort atoms. It's a map from an atom to its root of + // follow-on chain. A root atom is mapped to itself. If an atom is not in + // _followOnNexts, the atom is not in this map, and vice versa. + AtomToAtomT _followOnRoots; + + AtomToOrdinalT _ordinalOverrideMap; + + // Helper methods for buildFollowOnTable(). + const DefinedAtom *findAtomFollowedBy(const DefinedAtom *targetAtom); + bool checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom); + + void setChainRoot(const DefinedAtom *targetAtom, const DefinedAtom *root); + + std::vector<SortKey> decorate(File::AtomRange<DefinedAtom> &atomRange) const; + + void undecorate(File::AtomRange<DefinedAtom> &atomRange, + std::vector<SortKey> &keys) const; + + // Check if the follow-on graph is a correct structure. For debugging only. + void checkFollowonChain(const File::AtomRange<DefinedAtom> &range); +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_LAYOUT_PASS_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp new file mode 100644 index 000000000000..db4a96823e74 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp @@ -0,0 +1,1102 @@ +//===- lib/ReaderWriter/MachO/MachOLinkingContext.cpp ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "ArchHandler.h" +#include "File.h" +#include "FlatNamespaceFile.h" +#include "MachONormalizedFile.h" +#include "MachOPasses.h" +#include "SectCreateFile.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "lld/Driver/Driver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/Path.h" +#include <algorithm> + +using lld::mach_o::ArchHandler; +using lld::mach_o::MachOFile; +using lld::mach_o::MachODylibFile; +using namespace llvm::MachO; + +namespace lld { + +bool MachOLinkingContext::parsePackedVersion(StringRef str, uint32_t &result) { + result = 0; + + if (str.empty()) + return false; + + SmallVector<StringRef, 3> parts; + llvm::SplitString(str, parts, "."); + + unsigned long long num; + if (llvm::getAsUnsignedInteger(parts[0], 10, num)) + return true; + if (num > 65535) + return true; + result = num << 16; + + if (parts.size() > 1) { + if (llvm::getAsUnsignedInteger(parts[1], 10, num)) + return true; + if (num > 255) + return true; + result |= (num << 8); + } + + if (parts.size() > 2) { + if (llvm::getAsUnsignedInteger(parts[2], 10, num)) + return true; + if (num > 255) + return true; + result |= num; + } + + return false; +} + +bool MachOLinkingContext::parsePackedVersion(StringRef str, uint64_t &result) { + result = 0; + + if (str.empty()) + return false; + + SmallVector<StringRef, 5> parts; + llvm::SplitString(str, parts, "."); + + unsigned long long num; + if (llvm::getAsUnsignedInteger(parts[0], 10, num)) + return true; + if (num > 0xFFFFFF) + return true; + result = num << 40; + + unsigned Shift = 30; + for (StringRef str : llvm::makeArrayRef(parts).slice(1)) { + if (llvm::getAsUnsignedInteger(str, 10, num)) + return true; + if (num > 0x3FF) + return true; + result |= (num << Shift); + Shift -= 10; + } + + return false; +} + +MachOLinkingContext::ArchInfo MachOLinkingContext::_s_archInfos[] = { + { "x86_64", arch_x86_64, true, CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL }, + { "i386", arch_x86, true, CPU_TYPE_I386, CPU_SUBTYPE_X86_ALL }, + { "ppc", arch_ppc, false, CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL }, + { "armv6", arch_armv6, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6 }, + { "armv7", arch_armv7, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7 }, + { "armv7s", arch_armv7s, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S }, + { "arm64", arch_arm64, true, CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL }, + { "", arch_unknown,false, 0, 0 } +}; + +MachOLinkingContext::Arch +MachOLinkingContext::archFromCpuType(uint32_t cputype, uint32_t cpusubtype) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if ((info->cputype == cputype) && (info->cpusubtype == cpusubtype)) + return info->arch; + } + return arch_unknown; +} + +MachOLinkingContext::Arch +MachOLinkingContext::archFromName(StringRef archName) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->archName.equals(archName)) + return info->arch; + } + return arch_unknown; +} + +StringRef MachOLinkingContext::nameFromArch(Arch arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->archName; + } + return "<unknown>"; +} + +uint32_t MachOLinkingContext::cpuTypeFromArch(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->cputype; + } + llvm_unreachable("Unknown arch type"); +} + +uint32_t MachOLinkingContext::cpuSubtypeFromArch(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->cpusubtype; + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::isThinObjectFile(StringRef path, Arch &arch) { + return mach_o::normalized::isThinObjectFile(path, arch); +} + +bool MachOLinkingContext::sliceFromFatFile(MemoryBufferRef mb, uint32_t &offset, + uint32_t &size) { + return mach_o::normalized::sliceFromFatFile(mb, _arch, offset, size); +} + +MachOLinkingContext::MachOLinkingContext() {} + +MachOLinkingContext::~MachOLinkingContext() { + // Atoms are allocated on BumpPtrAllocator's on File's. + // As we transfer atoms from one file to another, we need to clear all of the + // atoms before we remove any of the BumpPtrAllocator's. + auto &nodes = getNodes(); + for (unsigned i = 0, e = nodes.size(); i != e; ++i) { + FileNode *node = dyn_cast<FileNode>(nodes[i].get()); + if (!node) + continue; + File *file = node->getFile(); + file->clearAtoms(); + } +} + +void MachOLinkingContext::configure(HeaderFileType type, Arch arch, OS os, + uint32_t minOSVersion, + bool exportDynamicSymbols) { + _outputMachOType = type; + _arch = arch; + _os = os; + _osMinVersion = minOSVersion; + + // If min OS not specified on command line, use reasonable defaults. + // Note that we only do sensible defaults when emitting something other than + // object and preload. + if (_outputMachOType != llvm::MachO::MH_OBJECT && + _outputMachOType != llvm::MachO::MH_PRELOAD) { + if (minOSVersion == 0) { + switch (_arch) { + case arch_x86_64: + case arch_x86: + parsePackedVersion("10.8", _osMinVersion); + _os = MachOLinkingContext::OS::macOSX; + break; + case arch_armv6: + case arch_armv7: + case arch_armv7s: + case arch_arm64: + parsePackedVersion("7.0", _osMinVersion); + _os = MachOLinkingContext::OS::iOS; + break; + default: + break; + } + } + } + + switch (_outputMachOType) { + case llvm::MachO::MH_EXECUTE: + // If targeting newer OS, use _main + if (minOS("10.8", "6.0")) { + _entrySymbolName = "_main"; + } else { + // If targeting older OS, use start (in crt1.o) + _entrySymbolName = "start"; + } + + // __PAGEZERO defaults to 4GB on 64-bit (except for PP64 which lld does not + // support) and 4KB on 32-bit. + if (is64Bit(_arch)) { + _pageZeroSize = 0x100000000; + } else { + _pageZeroSize = 0x1000; + } + + // Initial base address is __PAGEZERO size. + _baseAddress = _pageZeroSize; + + // Make PIE by default when targetting newer OSs. + switch (os) { + case OS::macOSX: + if (minOSVersion >= 0x000A0700) // MacOSX 10.7 + _pie = true; + break; + case OS::iOS: + if (minOSVersion >= 0x00040300) // iOS 4.3 + _pie = true; + break; + case OS::iOS_simulator: + _pie = true; + break; + case OS::unknown: + break; + } + setGlobalsAreDeadStripRoots(exportDynamicSymbols); + break; + case llvm::MachO::MH_DYLIB: + setGlobalsAreDeadStripRoots(exportDynamicSymbols); + break; + case llvm::MachO::MH_BUNDLE: + break; + case llvm::MachO::MH_OBJECT: + _printRemainingUndefines = false; + _allowRemainingUndefines = true; + default: + break; + } + + // Set default segment page sizes based on arch. + if (arch == arch_arm64) + _pageSize = 4*4096; +} + +uint32_t MachOLinkingContext::getCPUType() const { + return cpuTypeFromArch(_arch); +} + +uint32_t MachOLinkingContext::getCPUSubType() const { + return cpuSubtypeFromArch(_arch); +} + +bool MachOLinkingContext::is64Bit(Arch arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return (info->cputype & CPU_ARCH_ABI64); + } + } + // unknown archs are not 64-bit. + return false; +} + +bool MachOLinkingContext::isHostEndian(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return (info->littleEndian == llvm::sys::IsLittleEndianHost); + } + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::isBigEndian(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return ! info->littleEndian; + } + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::is64Bit() const { + return is64Bit(_arch); +} + +bool MachOLinkingContext::outputTypeHasEntry() const { + switch (_outputMachOType) { + case MH_EXECUTE: + case MH_DYLINKER: + case MH_PRELOAD: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsStubsPass() const { + switch (_outputMachOType) { + case MH_EXECUTE: + return !_outputMachOTypeStatic; + case MH_DYLIB: + case MH_BUNDLE: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsGOTPass() const { + // GOT pass not used in -r mode. + if (_outputMachOType == MH_OBJECT) + return false; + // Only some arches use GOT pass. + switch (_arch) { + case arch_x86_64: + case arch_arm64: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsCompactUnwindPass() const { + switch (_outputMachOType) { + case MH_EXECUTE: + case MH_DYLIB: + case MH_BUNDLE: + return archHandler().needsCompactUnwind(); + default: + return false; + } +} + +bool MachOLinkingContext::needsObjCPass() const { + // ObjC pass is only needed if any of the inputs were ObjC. + return _objcConstraint != objc_unknown; +} + +bool MachOLinkingContext::needsShimPass() const { + // Shim pass only used in final executables. + if (_outputMachOType == MH_OBJECT) + return false; + // Only 32-bit arm arches use Shim pass. + switch (_arch) { + case arch_armv6: + case arch_armv7: + case arch_armv7s: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsTLVPass() const { + switch (_outputMachOType) { + case MH_BUNDLE: + case MH_EXECUTE: + case MH_DYLIB: + return true; + default: + return false; + } +} + +StringRef MachOLinkingContext::binderSymbolName() const { + return archHandler().stubInfo().binderSymbolName; +} + +bool MachOLinkingContext::minOS(StringRef mac, StringRef iOS) const { + uint32_t parsedVersion; + switch (_os) { + case OS::macOSX: + if (parsePackedVersion(mac, parsedVersion)) + return false; + return _osMinVersion >= parsedVersion; + case OS::iOS: + case OS::iOS_simulator: + if (parsePackedVersion(iOS, parsedVersion)) + return false; + return _osMinVersion >= parsedVersion; + case OS::unknown: + // If we don't know the target, then assume that we don't meet the min OS. + // This matches the ld64 behaviour + return false; + } + llvm_unreachable("invalid OS enum"); +} + +bool MachOLinkingContext::addEntryPointLoadCommand() const { + if ((_outputMachOType == MH_EXECUTE) && !_outputMachOTypeStatic) { + return minOS("10.8", "6.0"); + } + return false; +} + +bool MachOLinkingContext::addUnixThreadLoadCommand() const { + switch (_outputMachOType) { + case MH_EXECUTE: + if (_outputMachOTypeStatic) + return true; + else + return !minOS("10.8", "6.0"); + break; + case MH_DYLINKER: + case MH_PRELOAD: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::pathExists(StringRef path) const { + if (!_testingFileUsage) + return llvm::sys::fs::exists(path.str()); + + // Otherwise, we're in test mode: only files explicitly provided on the + // command-line exist. + std::string key = path.str(); + std::replace(key.begin(), key.end(), '\\', '/'); + return _existingPaths.find(key) != _existingPaths.end(); +} + +bool MachOLinkingContext::fileExists(StringRef path) const { + bool found = pathExists(path); + // Log search misses. + if (!found) + addInputFileNotFound(path); + + // When testing, file is never opened, so logging is done here. + if (_testingFileUsage && found) + addInputFileDependency(path); + + return found; +} + +void MachOLinkingContext::setSysLibRoots(const StringRefVector &paths) { + _syslibRoots = paths; +} + +void MachOLinkingContext::addRpath(StringRef rpath) { + _rpaths.push_back(rpath); +} + +void MachOLinkingContext::addModifiedSearchDir(StringRef libPath, + bool isSystemPath) { + bool addedModifiedPath = false; + + // -syslibroot only applies to absolute paths. + if (libPath.startswith("/")) { + for (auto syslibRoot : _syslibRoots) { + SmallString<256> path(syslibRoot); + llvm::sys::path::append(path, libPath); + if (pathExists(path)) { + _searchDirs.push_back(path.str().copy(_allocator)); + addedModifiedPath = true; + } + } + } + + if (addedModifiedPath) + return; + + // Finally, if only one -syslibroot is given, system paths which aren't in it + // get suppressed. + if (_syslibRoots.size() != 1 || !isSystemPath) { + if (pathExists(libPath)) { + _searchDirs.push_back(libPath); + } + } +} + +void MachOLinkingContext::addFrameworkSearchDir(StringRef fwPath, + bool isSystemPath) { + bool pathAdded = false; + + // -syslibroot only used with to absolute framework search paths. + if (fwPath.startswith("/")) { + for (auto syslibRoot : _syslibRoots) { + SmallString<256> path(syslibRoot); + llvm::sys::path::append(path, fwPath); + if (pathExists(path)) { + _frameworkDirs.push_back(path.str().copy(_allocator)); + pathAdded = true; + } + } + } + // If fwPath found in any -syslibroot, then done. + if (pathAdded) + return; + + // If only one -syslibroot, system paths not in that SDK are suppressed. + if (isSystemPath && (_syslibRoots.size() == 1)) + return; + + // Only use raw fwPath if that directory exists. + if (pathExists(fwPath)) + _frameworkDirs.push_back(fwPath); +} + +llvm::Optional<StringRef> +MachOLinkingContext::searchDirForLibrary(StringRef path, + StringRef libName) const { + SmallString<256> fullPath; + if (libName.endswith(".o")) { + // A request ending in .o is special: just search for the file directly. + fullPath.assign(path); + llvm::sys::path::append(fullPath, libName); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + return llvm::None; + } + + // Search for dynamic library + fullPath.assign(path); + llvm::sys::path::append(fullPath, Twine("lib") + libName + ".dylib"); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + + // If not, try for a static library + fullPath.assign(path); + llvm::sys::path::append(fullPath, Twine("lib") + libName + ".a"); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + + return llvm::None; +} + +llvm::Optional<StringRef> +MachOLinkingContext::searchLibrary(StringRef libName) const { + SmallString<256> path; + for (StringRef dir : searchDirs()) { + llvm::Optional<StringRef> searchDir = searchDirForLibrary(dir, libName); + if (searchDir) + return searchDir; + } + + return llvm::None; +} + +llvm::Optional<StringRef> +MachOLinkingContext::findPathForFramework(StringRef fwName) const{ + SmallString<256> fullPath; + for (StringRef dir : frameworkDirs()) { + fullPath.assign(dir); + llvm::sys::path::append(fullPath, Twine(fwName) + ".framework", fwName); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + } + + return llvm::None; +} + +bool MachOLinkingContext::validateImpl(raw_ostream &diagnostics) { + // TODO: if -arch not specified, look at arch of first .o file. + + if (_currentVersion && _outputMachOType != MH_DYLIB) { + diagnostics << "error: -current_version can only be used with dylibs\n"; + return false; + } + + if (_compatibilityVersion && _outputMachOType != MH_DYLIB) { + diagnostics + << "error: -compatibility_version can only be used with dylibs\n"; + return false; + } + + if (_deadStrippableDylib && _outputMachOType != MH_DYLIB) { + diagnostics + << "error: -mark_dead_strippable_dylib can only be used with dylibs.\n"; + return false; + } + + if (!_bundleLoader.empty() && outputMachOType() != MH_BUNDLE) { + diagnostics + << "error: -bundle_loader can only be used with Mach-O bundles\n"; + return false; + } + + // If -exported_symbols_list used, all exported symbols must be defined. + if (_exportMode == ExportMode::whiteList) { + for (const auto &symbol : _exportedSymbols) + addInitialUndefinedSymbol(symbol.getKey()); + } + + // If -dead_strip, set up initial live symbols. + if (deadStrip()) { + // Entry point is live. + if (outputTypeHasEntry()) + addDeadStripRoot(entrySymbolName()); + // Lazy binding helper is live. + if (needsStubsPass()) + addDeadStripRoot(binderSymbolName()); + // If using -exported_symbols_list, make all exported symbols live. + if (_exportMode == ExportMode::whiteList) { + setGlobalsAreDeadStripRoots(false); + for (const auto &symbol : _exportedSymbols) + addDeadStripRoot(symbol.getKey()); + } + } + + addOutputFileDependency(outputPath()); + + return true; +} + +void MachOLinkingContext::addPasses(PassManager &pm) { + // objc pass should be before layout pass. Otherwise test cases may contain + // no atoms which confuses the layout pass. + if (needsObjCPass()) + mach_o::addObjCPass(pm, *this); + mach_o::addLayoutPass(pm, *this); + if (needsStubsPass()) + mach_o::addStubsPass(pm, *this); + if (needsCompactUnwindPass()) + mach_o::addCompactUnwindPass(pm, *this); + if (needsGOTPass()) + mach_o::addGOTPass(pm, *this); + if (needsTLVPass()) + mach_o::addTLVPass(pm, *this); + if (needsShimPass()) + mach_o::addShimPass(pm, *this); // Shim pass must run after stubs pass. +} + +Writer &MachOLinkingContext::writer() const { + if (!_writer) + _writer = createWriterMachO(*this); + return *_writer; +} + +ErrorOr<std::unique_ptr<MemoryBuffer>> +MachOLinkingContext::getMemoryBuffer(StringRef path) { + addInputFileDependency(path); + + ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = + MemoryBuffer::getFileOrSTDIN(path); + if (std::error_code ec = mbOrErr.getError()) + return ec; + std::unique_ptr<MemoryBuffer> mb = std::move(mbOrErr.get()); + + // If buffer contains a fat file, find required arch in fat buffer + // and switch buffer to point to just that required slice. + uint32_t offset; + uint32_t size; + if (sliceFromFatFile(mb->getMemBufferRef(), offset, size)) + return MemoryBuffer::getFileSlice(path, size, offset); + return std::move(mb); +} + +MachODylibFile* MachOLinkingContext::loadIndirectDylib(StringRef path) { + ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = getMemoryBuffer(path); + if (mbOrErr.getError()) + return nullptr; + + ErrorOr<std::unique_ptr<File>> fileOrErr = + registry().loadFile(std::move(mbOrErr.get())); + if (!fileOrErr) + return nullptr; + std::unique_ptr<File> &file = fileOrErr.get(); + file->parse(); + MachODylibFile *result = reinterpret_cast<MachODylibFile *>(file.get()); + // Node object now owned by _indirectDylibs vector. + _indirectDylibs.push_back(std::move(file)); + return result; +} + +MachODylibFile* MachOLinkingContext::findIndirectDylib(StringRef path) { + // See if already loaded. + auto pos = _pathToDylibMap.find(path); + if (pos != _pathToDylibMap.end()) + return pos->second; + + // Search -L paths if of the form "libXXX.dylib" + std::pair<StringRef, StringRef> split = path.rsplit('/'); + StringRef leafName = split.second; + if (leafName.startswith("lib") && leafName.endswith(".dylib")) { + // FIXME: Need to enhance searchLibrary() to only look for .dylib + auto libPath = searchLibrary(leafName); + if (libPath) + return loadIndirectDylib(libPath.getValue()); + } + + // Try full path with sysroot. + for (StringRef sysPath : _syslibRoots) { + SmallString<256> fullPath; + fullPath.assign(sysPath); + llvm::sys::path::append(fullPath, path); + if (pathExists(fullPath)) + return loadIndirectDylib(fullPath); + } + + // Try full path. + if (pathExists(path)) { + return loadIndirectDylib(path); + } + + return nullptr; +} + +uint32_t MachOLinkingContext::dylibCurrentVersion(StringRef installName) const { + auto pos = _pathToDylibMap.find(installName); + if (pos != _pathToDylibMap.end()) + return pos->second->currentVersion(); + else + return 0x10000; // 1.0 +} + +uint32_t MachOLinkingContext::dylibCompatVersion(StringRef installName) const { + auto pos = _pathToDylibMap.find(installName); + if (pos != _pathToDylibMap.end()) + return pos->second->compatVersion(); + else + return 0x10000; // 1.0 +} + +void MachOLinkingContext::createImplicitFiles( + std::vector<std::unique_ptr<File> > &result) { + // Add indirect dylibs by asking each linked dylib to add its indirects. + // Iterate until no more dylibs get loaded. + size_t dylibCount = 0; + while (dylibCount != _allDylibs.size()) { + dylibCount = _allDylibs.size(); + for (MachODylibFile *dylib : _allDylibs) { + dylib->loadReExportedDylibs([this] (StringRef path) -> MachODylibFile* { + return findIndirectDylib(path); }); + } + } + + // Let writer add output type specific extras. + writer().createImplicitFiles(result); + + // If undefinedMode is != error, add a FlatNamespaceFile instance. This will + // provide a SharedLibraryAtom for symbols that aren't defined elsewhere. + if (undefinedMode() != UndefinedMode::error) { + result.emplace_back(new mach_o::FlatNamespaceFile(*this)); + _flatNamespaceFile = result.back().get(); + } +} + +void MachOLinkingContext::registerDylib(MachODylibFile *dylib, + bool upward) const { + std::lock_guard<std::mutex> lock(_dylibsMutex); + + if (std::find(_allDylibs.begin(), + _allDylibs.end(), dylib) == _allDylibs.end()) + _allDylibs.push_back(dylib); + _pathToDylibMap[dylib->installName()] = dylib; + // If path is different than install name, register path too. + if (!dylib->path().equals(dylib->installName())) + _pathToDylibMap[dylib->path()] = dylib; + if (upward) + _upwardDylibs.insert(dylib); +} + +bool MachOLinkingContext::isUpwardDylib(StringRef installName) const { + for (MachODylibFile *dylib : _upwardDylibs) { + if (dylib->installName().equals(installName)) + return true; + } + return false; +} + +ArchHandler &MachOLinkingContext::archHandler() const { + if (!_archHandler) + _archHandler = ArchHandler::create(_arch); + return *_archHandler; +} + +void MachOLinkingContext::addSectionAlignment(StringRef seg, StringRef sect, + uint16_t align) { + SectionAlign entry = { seg, sect, align }; + _sectAligns.push_back(entry); +} + +void MachOLinkingContext::addSectCreateSection( + StringRef seg, StringRef sect, + std::unique_ptr<MemoryBuffer> content) { + + if (!_sectCreateFile) { + auto sectCreateFile = llvm::make_unique<mach_o::SectCreateFile>(); + _sectCreateFile = sectCreateFile.get(); + getNodes().push_back(llvm::make_unique<FileNode>(std::move(sectCreateFile))); + } + + assert(_sectCreateFile && "sectcreate file does not exist."); + _sectCreateFile->addSection(seg, sect, std::move(content)); +} + +bool MachOLinkingContext::sectionAligned(StringRef seg, StringRef sect, + uint16_t &align) const { + for (const SectionAlign &entry : _sectAligns) { + if (seg.equals(entry.segmentName) && sect.equals(entry.sectionName)) { + align = entry.align; + return true; + } + } + return false; +} + +void MachOLinkingContext::addExportSymbol(StringRef sym) { + // Support old crufty export lists with bogus entries. + if (sym.endswith(".eh") || sym.startswith(".objc_category_name_")) { + llvm::errs() << "warning: ignoring " << sym << " in export list\n"; + return; + } + // Only i386 MacOSX uses old ABI, so don't change those. + if ((_os != OS::macOSX) || (_arch != arch_x86)) { + // ObjC has two differnent ABIs. Be nice and allow one export list work for + // both ABIs by renaming symbols. + if (sym.startswith(".objc_class_name_")) { + std::string abi2className("_OBJC_CLASS_$_"); + abi2className += sym.substr(17); + _exportedSymbols.insert(copy(abi2className)); + std::string abi2metaclassName("_OBJC_METACLASS_$_"); + abi2metaclassName += sym.substr(17); + _exportedSymbols.insert(copy(abi2metaclassName)); + return; + } + } + + // FIXME: Support wildcards. + _exportedSymbols.insert(sym); +} + +bool MachOLinkingContext::exportSymbolNamed(StringRef sym) const { + switch (_exportMode) { + case ExportMode::globals: + llvm_unreachable("exportSymbolNamed() should not be called in this mode"); + break; + case ExportMode::whiteList: + return _exportedSymbols.count(sym); + case ExportMode::blackList: + return !_exportedSymbols.count(sym); + } + llvm_unreachable("_exportMode unknown enum value"); +} + +std::string MachOLinkingContext::demangle(StringRef symbolName) const { + // Only try to demangle symbols if -demangle on command line + if (!demangleSymbols()) + return symbolName; + + // Only try to demangle symbols that look like C++ symbols + if (!symbolName.startswith("__Z")) + return symbolName; + + SmallString<256> symBuff; + StringRef nullTermSym = Twine(symbolName).toNullTerminatedStringRef(symBuff); + // Mach-O has extra leading underscore that needs to be removed. + const char *cstr = nullTermSym.data() + 1; + int status; + char *demangled = llvm::itaniumDemangle(cstr, nullptr, nullptr, &status); + if (demangled) { + std::string result(demangled); + // __cxa_demangle() always uses a malloc'ed buffer to return the result. + free(demangled); + return result; + } + + return symbolName; +} + +static void addDependencyInfoHelper(llvm::raw_fd_ostream *DepInfo, + char Opcode, StringRef Path) { + if (!DepInfo) + return; + + *DepInfo << Opcode; + *DepInfo << Path; + *DepInfo << '\0'; +} + +std::error_code MachOLinkingContext::createDependencyFile(StringRef path) { + std::error_code ec; + _dependencyInfo = std::unique_ptr<llvm::raw_fd_ostream>(new + llvm::raw_fd_ostream(path, ec, llvm::sys::fs::F_None)); + if (ec) { + _dependencyInfo.reset(); + return ec; + } + + addDependencyInfoHelper(_dependencyInfo.get(), 0x00, "lld" /*FIXME*/); + return std::error_code(); +} + +void MachOLinkingContext::addInputFileDependency(StringRef path) const { + addDependencyInfoHelper(_dependencyInfo.get(), 0x10, path); +} + +void MachOLinkingContext::addInputFileNotFound(StringRef path) const { + addDependencyInfoHelper(_dependencyInfo.get(), 0x11, path); +} + +void MachOLinkingContext::addOutputFileDependency(StringRef path) const { + addDependencyInfoHelper(_dependencyInfo.get(), 0x40, path); +} + +void MachOLinkingContext::appendOrderedSymbol(StringRef symbol, + StringRef filename) { + // To support sorting static functions which may have the same name in + // multiple .o files, _orderFiles maps the symbol name to a vector + // of OrderFileNode each of which can specify a file prefix. + OrderFileNode info; + if (!filename.empty()) + info.fileFilter = copy(filename); + info.order = _orderFileEntries++; + _orderFiles[symbol].push_back(info); +} + +bool +MachOLinkingContext::findOrderOrdinal(const std::vector<OrderFileNode> &nodes, + const DefinedAtom *atom, + unsigned &ordinal) { + const File *objFile = &atom->file(); + assert(objFile); + StringRef objName = objFile->path(); + std::pair<StringRef, StringRef> dirAndLeaf = objName.rsplit('/'); + if (!dirAndLeaf.second.empty()) + objName = dirAndLeaf.second; + for (const OrderFileNode &info : nodes) { + if (info.fileFilter.empty()) { + // Have unprefixed symbol name in order file that matches this atom. + ordinal = info.order; + return true; + } + if (info.fileFilter.equals(objName)) { + // Have prefixed symbol name in order file that matches atom's path. + ordinal = info.order; + return true; + } + } + return false; +} + +bool MachOLinkingContext::customAtomOrderer(const DefinedAtom *left, + const DefinedAtom *right, + bool &leftBeforeRight) const { + // No custom sorting if no order file entries. + if (!_orderFileEntries) + return false; + + // Order files can only order named atoms. + StringRef leftName = left->name(); + StringRef rightName = right->name(); + if (leftName.empty() || rightName.empty()) + return false; + + // If neither is in order file list, no custom sorter. + auto leftPos = _orderFiles.find(leftName); + auto rightPos = _orderFiles.find(rightName); + bool leftIsOrdered = (leftPos != _orderFiles.end()); + bool rightIsOrdered = (rightPos != _orderFiles.end()); + if (!leftIsOrdered && !rightIsOrdered) + return false; + + // There could be multiple symbols with same name but different file prefixes. + unsigned leftOrder; + unsigned rightOrder; + bool foundLeft = + leftIsOrdered && findOrderOrdinal(leftPos->getValue(), left, leftOrder); + bool foundRight = rightIsOrdered && + findOrderOrdinal(rightPos->getValue(), right, rightOrder); + if (!foundLeft && !foundRight) + return false; + + // If only one is in order file list, ordered one goes first. + if (foundLeft != foundRight) + leftBeforeRight = foundLeft; + else + leftBeforeRight = (leftOrder < rightOrder); + + return true; +} + +static bool isLibrary(const std::unique_ptr<Node> &elem) { + if (FileNode *node = dyn_cast<FileNode>(const_cast<Node *>(elem.get()))) { + File *file = node->getFile(); + return isa<SharedLibraryFile>(file) || isa<ArchiveLibraryFile>(file); + } + return false; +} + +// The darwin linker processes input files in two phases. The first phase +// links in all object (.o) files in command line order. The second phase +// links in libraries in command line order. +// In this function we reorder the input files so that all the object files +// comes before any library file. We also make a group for the library files +// so that the Resolver will reiterate over the libraries as long as we find +// new undefines from libraries. +void MachOLinkingContext::finalizeInputFiles() { + std::vector<std::unique_ptr<Node>> &elements = getNodes(); + std::stable_sort(elements.begin(), elements.end(), + [](const std::unique_ptr<Node> &a, + const std::unique_ptr<Node> &b) { + return !isLibrary(a) && isLibrary(b); + }); + size_t numLibs = std::count_if(elements.begin(), elements.end(), isLibrary); + elements.push_back(llvm::make_unique<GroupEnd>(numLibs)); +} + +llvm::Error MachOLinkingContext::handleLoadedFile(File &file) { + auto *machoFile = dyn_cast<MachOFile>(&file); + if (!machoFile) + return llvm::Error::success(); + + // Check that the arch of the context matches that of the file. + // Also set the arch of the context if it didn't have one. + if (_arch == arch_unknown) { + _arch = machoFile->arch(); + } else if (machoFile->arch() != arch_unknown && machoFile->arch() != _arch) { + // Archs are different. + return llvm::make_error<GenericError>(file.path() + + Twine(" cannot be linked due to incompatible architecture")); + } + + // Check that the OS of the context matches that of the file. + // Also set the OS of the context if it didn't have one. + if (_os == OS::unknown) { + _os = machoFile->OS(); + } else if (machoFile->OS() != OS::unknown && machoFile->OS() != _os) { + // OSes are different. + return llvm::make_error<GenericError>(file.path() + + Twine(" cannot be linked due to incompatible operating systems")); + } + + // Check that if the objc info exists, that it is compatible with the target + // OS. + switch (machoFile->objcConstraint()) { + case objc_unknown: + // The file is not compiled with objc, so skip the checks. + break; + case objc_gc_only: + case objc_supports_gc: + llvm_unreachable("GC support should already have thrown an error"); + case objc_retainReleaseForSimulator: + // The file is built with simulator objc, so make sure that the context + // is also building with simulator support. + if (_os != OS::iOS_simulator) + return llvm::make_error<GenericError>(file.path() + + Twine(" cannot be linked. It contains ObjC built for the simulator" + " while we are linking a non-simulator target")); + assert((_objcConstraint == objc_unknown || + _objcConstraint == objc_retainReleaseForSimulator) && + "Must be linking with retain/release for the simulator"); + _objcConstraint = objc_retainReleaseForSimulator; + break; + case objc_retainRelease: + // The file is built without simulator objc, so make sure that the + // context is also building without simulator support. + if (_os == OS::iOS_simulator) + return llvm::make_error<GenericError>(file.path() + + Twine(" cannot be linked. It contains ObjC built for a non-simulator" + " target while we are linking a simulator target")); + assert((_objcConstraint == objc_unknown || + _objcConstraint == objc_retainRelease) && + "Must be linking with retain/release for a non-simulator target"); + _objcConstraint = objc_retainRelease; + break; + } + + // Check that the swift version of the context matches that of the file. + // Also set the swift version of the context if it didn't have one. + if (!_swiftVersion) { + _swiftVersion = machoFile->swiftVersion(); + } else if (machoFile->swiftVersion() && + machoFile->swiftVersion() != _swiftVersion) { + // Swift versions are different. + return llvm::make_error<GenericError>("different swift versions"); + } + + return llvm::Error::success(); +} + +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h new file mode 100644 index 000000000000..60d76d4b5c9b --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h @@ -0,0 +1,345 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file These data structures comprise the "normalized" view of +/// mach-o object files. The normalized view is an in-memory only data structure +/// which is always in native endianness and pointer size. +/// +/// The normalized view easily converts to and from YAML using YAML I/O. +/// +/// The normalized view converts to and from binary mach-o object files using +/// the writeBinary() and readBinary() functions. +/// +/// The normalized view converts to and from lld::Atoms using the +/// normalizedToAtoms() and normalizedFromAtoms(). +/// +/// Overall, the conversion paths available look like: +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// ^ +/// | +/// v +/// +------------+ +------+ +/// | normalized | <-> | yaml | +/// +------------+ +------+ +/// ^ +/// | +/// v +/// +-------+ +/// | Atoms | +/// +-------+ +/// + +#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H +#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H + +#include "DebugInfo.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/YAMLTraits.h" + +using llvm::BumpPtrAllocator; +using llvm::yaml::Hex64; +using llvm::yaml::Hex32; +using llvm::yaml::Hex16; +using llvm::yaml::Hex8; +using llvm::yaml::SequenceTraits; +using llvm::MachO::HeaderFileType; +using llvm::MachO::BindType; +using llvm::MachO::RebaseType; +using llvm::MachO::NListType; +using llvm::MachO::RelocationInfoType; +using llvm::MachO::SectionType; +using llvm::MachO::LoadCommandType; +using llvm::MachO::ExportSymbolKind; +using llvm::MachO::DataRegionType; + +namespace lld { +namespace mach_o { +namespace normalized { + + +/// The real mach-o relocation record is 8-bytes on disk and is +/// encoded in one of two different bit-field patterns. This +/// normalized form has the union of all possible fields. +struct Relocation { + Relocation() : offset(0), scattered(false), + type(llvm::MachO::GENERIC_RELOC_VANILLA), + length(0), pcRel(false), isExtern(false), value(0), + symbol(0) { } + + Hex32 offset; + bool scattered; + RelocationInfoType type; + uint8_t length; + bool pcRel; + bool isExtern; + Hex32 value; + uint32_t symbol; +}; + +/// A typedef so that YAML I/O can treat this vector as a sequence. +typedef std::vector<Relocation> Relocations; + +/// A typedef so that YAML I/O can process the raw bytes in a section. +typedef std::vector<Hex8> ContentBytes; + +/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence. +typedef std::vector<uint32_t> IndirectSymbols; + +/// A typedef so that YAML I/O can encode/decode section attributes. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr) + +/// A typedef so that YAML I/O can encode/decode section alignment. +LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment) + +/// Mach-O has a 32-bit and 64-bit section record. This normalized form +/// can support either kind. +struct Section { + Section() : type(llvm::MachO::S_REGULAR), + attributes(0), alignment(1), address(0) { } + + StringRef segmentName; + StringRef sectionName; + SectionType type; + SectionAttr attributes; + SectionAlignment alignment; + Hex64 address; + ArrayRef<uint8_t> content; + Relocations relocations; + IndirectSymbols indirectSymbols; + +#ifndef NDEBUG + raw_ostream& operator<<(raw_ostream &OS) const { + dump(OS); + return OS; + } + + void dump(raw_ostream &OS = llvm::dbgs()) const; +#endif +}; + + +/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist. +LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope) + +/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist. +LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc) + +/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol +/// type and scope and mixed in the same n_type field. This normalized form +/// works for any pointer size and separates out the type and scope. +struct Symbol { + Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { } + + StringRef name; + NListType type; + SymbolScope scope; + uint8_t sect; + SymbolDesc desc; + Hex64 value; +}; + +/// Check whether the given section type indicates a zero-filled section. +// FIXME: Utility functions of this kind should probably be moved into +// llvm/Support. +inline bool isZeroFillSection(SectionType T) { + return (T == llvm::MachO::S_ZEROFILL || + T == llvm::MachO::S_THREAD_LOCAL_ZEROFILL); +} + +/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect) + +/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz +LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion) + +/// Segments are only used in normalized final linked images (not in relocatable +/// object files). They specify how a range of the file is loaded. +struct Segment { + StringRef name; + Hex64 address; + Hex64 size; + VMProtect init_access; + VMProtect max_access; +}; + +/// Only used in normalized final linked images to specify on which dylibs +/// it depends. +struct DependentDylib { + StringRef path; + LoadCommandType kind; + PackedVersion compatVersion; + PackedVersion currentVersion; +}; + +/// A normalized rebasing entry. Only used in normalized final linked images. +struct RebaseLocation { + Hex32 segOffset; + uint8_t segIndex; + RebaseType kind; +}; + +/// A normalized binding entry. Only used in normalized final linked images. +struct BindLocation { + Hex32 segOffset; + uint8_t segIndex; + BindType kind; + bool canBeNull; + int ordinal; + StringRef symbolName; + Hex64 addend; +}; + +/// A typedef so that YAML I/O can encode/decode export flags. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags) + +/// A normalized export entry. Only used in normalized final linked images. +struct Export { + StringRef name; + Hex64 offset; + ExportSymbolKind kind; + ExportFlags flags; + Hex32 otherOffset; + StringRef otherName; +}; + +/// A normalized data-in-code entry. +struct DataInCode { + Hex32 offset; + Hex16 length; + DataRegionType kind; +}; + +/// A typedef so that YAML I/O can encode/decode mach_header.flags. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags) + +/// +struct NormalizedFile { + MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; + HeaderFileType fileType = llvm::MachO::MH_OBJECT; + FileFlags flags = 0; + std::vector<Segment> segments; // Not used in object files. + std::vector<Section> sections; + + // Symbols sorted by kind. + std::vector<Symbol> localSymbols; + std::vector<Symbol> globalSymbols; + std::vector<Symbol> undefinedSymbols; + std::vector<Symbol> stabsSymbols; + + // Maps to load commands with no LINKEDIT content (final linked images only). + std::vector<DependentDylib> dependentDylibs; + StringRef installName; // dylibs only + PackedVersion compatVersion = 0; // dylibs only + PackedVersion currentVersion = 0; // dylibs only + bool hasUUID = false; + bool hasMinVersionLoadCommand = false; + bool generateDataInCodeLoadCommand = false; + std::vector<StringRef> rpaths; + Hex64 entryAddress = 0; + Hex64 stackSize = 0; + MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown; + Hex64 sourceVersion = 0; + PackedVersion minOSverson = 0; + PackedVersion sdkVersion = 0; + LoadCommandType minOSVersionKind = (LoadCommandType)0; + + // Maps to load commands with LINKEDIT content (final linked images only). + Hex32 pageSize = 0; + std::vector<RebaseLocation> rebasingInfo; + std::vector<BindLocation> bindingInfo; + std::vector<BindLocation> weakBindingInfo; + std::vector<BindLocation> lazyBindingInfo; + std::vector<Export> exportInfo; + std::vector<uint8_t> functionStarts; + std::vector<DataInCode> dataInCode; + + // TODO: + // code-signature + // split-seg-info + // function-starts + + // For any allocations in this struct which need to be owned by this struct. + BumpPtrAllocator ownedAllocations; +}; + +/// Tests if a file is a non-fat mach-o object file. +bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch); + +/// If the buffer is a fat file with the request arch, then this function +/// returns true with 'offset' and 'size' set to location of the arch slice +/// within the buffer. Otherwise returns false; +bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch, + uint32_t &offset, uint32_t &size); + +/// Reads a mach-o file and produces an in-memory normalized view. +llvm::Expected<std::unique_ptr<NormalizedFile>> +readBinary(std::unique_ptr<MemoryBuffer> &mb, + const MachOLinkingContext::Arch arch); + +/// Takes in-memory normalized view and writes a mach-o object file. +llvm::Error writeBinary(const NormalizedFile &file, StringRef path); + +size_t headerAndLoadCommandsSize(const NormalizedFile &file); + + +/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. +llvm::Expected<std::unique_ptr<NormalizedFile>> +readYaml(std::unique_ptr<MemoryBuffer> &mb); + +/// Writes a yaml encoded mach-o files given an in-memory normalized view. +std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out); + +llvm::Error +normalizedObjectToAtoms(MachOFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs); + +llvm::Error +normalizedDylibToAtoms(MachODylibFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs); + +/// Takes in-memory normalized dylib or object and parses it into lld::File +llvm::Expected<std::unique_ptr<lld::File>> +normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs); + +/// Takes atoms and generates a normalized macho-o view. +llvm::Expected<std::unique_ptr<NormalizedFile>> +normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt); + + +} // namespace normalized + +/// Class for interfacing mach-o yaml files into generic yaml parsing +class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { +public: + MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch) + : _arch(arch) { } + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override; +private: + const MachOLinkingContext::Arch _arch; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp new file mode 100644 index 000000000000..23c7ea17f7e7 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp @@ -0,0 +1,590 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation converts from +/// mach-o on-disk binary format to in-memory normalized mach-o. +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// | +/// | +/// v +/// +------------+ +/// | normalized | +/// +------------+ + +#include "MachONormalizedFile.h" +#include "ArchHandler.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/SharedLibraryFile.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <functional> +#include <system_error> + +using namespace llvm::MachO; +using llvm::object::ExportEntry; +using llvm::object::MachOObjectFile; + +namespace lld { +namespace mach_o { +namespace normalized { + +// Utility to call a lambda expression on each load command. +static llvm::Error forEachLoadCommand( + StringRef lcRange, unsigned lcCount, bool isBig, bool is64, + std::function<bool(uint32_t cmd, uint32_t size, const char *lc)> func) { + const char* p = lcRange.begin(); + for (unsigned i=0; i < lcCount; ++i) { + const load_command *lc = reinterpret_cast<const load_command*>(p); + load_command lcCopy; + const load_command *slc = lc; + if (isBig != llvm::sys::IsBigEndianHost) { + memcpy(&lcCopy, lc, sizeof(load_command)); + swapStruct(lcCopy); + slc = &lcCopy; + } + if ( (p + slc->cmdsize) > lcRange.end() ) + return llvm::make_error<GenericError>("Load command exceeds range"); + + if (func(slc->cmd, slc->cmdsize, p)) + return llvm::Error::success(); + + p += slc->cmdsize; + } + + return llvm::Error::success(); +} + +static std::error_code appendRelocations(Relocations &relocs, StringRef buffer, + bool bigEndian, + uint32_t reloff, uint32_t nreloc) { + if ((reloff + nreloc*8) > buffer.size()) + return make_error_code(llvm::errc::executable_format_error); + const any_relocation_info* relocsArray = + reinterpret_cast<const any_relocation_info*>(buffer.begin()+reloff); + + for(uint32_t i=0; i < nreloc; ++i) { + relocs.push_back(unpackRelocation(relocsArray[i], bigEndian)); + } + return std::error_code(); +} + +static std::error_code +appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool isBig, + uint32_t istOffset, uint32_t istCount, + uint32_t startIndex, uint32_t count) { + if ((istOffset + istCount*4) > buffer.size()) + return make_error_code(llvm::errc::executable_format_error); + if (startIndex+count > istCount) + return make_error_code(llvm::errc::executable_format_error); + const uint8_t *indirectSymbolArray = (const uint8_t *)buffer.data(); + + for(uint32_t i=0; i < count; ++i) { + isyms.push_back(read32( + indirectSymbolArray + (startIndex + i) * sizeof(uint32_t), isBig)); + } + return std::error_code(); +} + + +template <typename T> static T readBigEndian(T t) { + if (llvm::sys::IsLittleEndianHost) + llvm::sys::swapByteOrder(t); + return t; +} + + +static bool isMachOHeader(const mach_header *mh, bool &is64, bool &isBig) { + switch (read32(&mh->magic, false)) { + case llvm::MachO::MH_MAGIC: + is64 = false; + isBig = false; + return true; + case llvm::MachO::MH_MAGIC_64: + is64 = true; + isBig = false; + return true; + case llvm::MachO::MH_CIGAM: + is64 = false; + isBig = true; + return true; + case llvm::MachO::MH_CIGAM_64: + is64 = true; + isBig = true; + return true; + default: + return false; + } +} + + +bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch) { + // Try opening and mapping file at path. + ErrorOr<std::unique_ptr<MemoryBuffer>> b = MemoryBuffer::getFileOrSTDIN(path); + if (b.getError()) + return false; + + // If file length < 32 it is too small to be mach-o object file. + StringRef fileBuffer = b->get()->getBuffer(); + if (fileBuffer.size() < 32) + return false; + + // If file buffer does not start with MH_MAGIC (and variants), not obj file. + const mach_header *mh = reinterpret_cast<const mach_header *>( + fileBuffer.begin()); + bool is64, isBig; + if (!isMachOHeader(mh, is64, isBig)) + return false; + + // If not MH_OBJECT, not object file. + if (read32(&mh->filetype, isBig) != MH_OBJECT) + return false; + + // Lookup up arch from cpu/subtype pair. + arch = MachOLinkingContext::archFromCpuType( + read32(&mh->cputype, isBig), + read32(&mh->cpusubtype, isBig)); + return true; +} + +bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch, + uint32_t &offset, uint32_t &size) { + const char *start = mb.getBufferStart(); + const llvm::MachO::fat_header *fh = + reinterpret_cast<const llvm::MachO::fat_header *>(start); + if (readBigEndian(fh->magic) != llvm::MachO::FAT_MAGIC) + return false; + uint32_t nfat_arch = readBigEndian(fh->nfat_arch); + const fat_arch *fstart = + reinterpret_cast<const fat_arch *>(start + sizeof(fat_header)); + const fat_arch *fend = + reinterpret_cast<const fat_arch *>(start + sizeof(fat_header) + + sizeof(fat_arch) * nfat_arch); + const uint32_t reqCpuType = MachOLinkingContext::cpuTypeFromArch(arch); + const uint32_t reqCpuSubtype = MachOLinkingContext::cpuSubtypeFromArch(arch); + for (const fat_arch *fa = fstart; fa < fend; ++fa) { + if ((readBigEndian(fa->cputype) == reqCpuType) && + (readBigEndian(fa->cpusubtype) == reqCpuSubtype)) { + offset = readBigEndian(fa->offset); + size = readBigEndian(fa->size); + if ((offset + size) > mb.getBufferSize()) + return false; + return true; + } + } + return false; +} + +/// Reads a mach-o file and produces an in-memory normalized view. +llvm::Expected<std::unique_ptr<NormalizedFile>> +readBinary(std::unique_ptr<MemoryBuffer> &mb, + const MachOLinkingContext::Arch arch) { + // Make empty NormalizedFile. + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + + const char *start = mb->getBufferStart(); + size_t objSize = mb->getBufferSize(); + const mach_header *mh = reinterpret_cast<const mach_header *>(start); + + uint32_t sliceOffset; + uint32_t sliceSize; + if (sliceFromFatFile(mb->getMemBufferRef(), arch, sliceOffset, sliceSize)) { + start = &start[sliceOffset]; + objSize = sliceSize; + mh = reinterpret_cast<const mach_header *>(start); + } + + // Determine endianness and pointer size for mach-o file. + bool is64, isBig; + if (!isMachOHeader(mh, is64, isBig)) + return llvm::make_error<GenericError>("File is not a mach-o"); + + // Endian swap header, if needed. + mach_header headerCopy; + const mach_header *smh = mh; + if (isBig != llvm::sys::IsBigEndianHost) { + memcpy(&headerCopy, mh, sizeof(mach_header)); + swapStruct(headerCopy); + smh = &headerCopy; + } + + // Validate head and load commands fit in buffer. + const uint32_t lcCount = smh->ncmds; + const char *lcStart = + start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header)); + StringRef lcRange(lcStart, smh->sizeofcmds); + if (lcRange.end() > (start + objSize)) + return llvm::make_error<GenericError>("Load commands exceed file size"); + + // Get architecture from mach_header. + f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype); + if (f->arch != arch) { + return llvm::make_error<GenericError>( + Twine("file is wrong architecture. Expected " + "(" + MachOLinkingContext::nameFromArch(arch) + + ") found (" + + MachOLinkingContext::nameFromArch(f->arch) + + ")" )); + } + // Copy file type and flags + f->fileType = HeaderFileType(smh->filetype); + f->flags = smh->flags; + + + // Pre-scan load commands looking for indirect symbol table. + uint32_t indirectSymbolTableOffset = 0; + uint32_t indirectSymbolTableCount = 0; + auto ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, + [&](uint32_t cmd, uint32_t size, + const char *lc) -> bool { + if (cmd == LC_DYSYMTAB) { + const dysymtab_command *d = reinterpret_cast<const dysymtab_command*>(lc); + indirectSymbolTableOffset = read32(&d->indirectsymoff, isBig); + indirectSymbolTableCount = read32(&d->nindirectsyms, isBig); + return true; + } + return false; + }); + if (ec) + return std::move(ec); + + // Walk load commands looking for segments/sections and the symbol table. + const data_in_code_entry *dataInCode = nullptr; + const dyld_info_command *dyldInfo = nullptr; + uint32_t dataInCodeSize = 0; + ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, + [&] (uint32_t cmd, uint32_t size, const char* lc) -> bool { + switch(cmd) { + case LC_SEGMENT_64: + if (is64) { + const segment_command_64 *seg = + reinterpret_cast<const segment_command_64*>(lc); + const unsigned sectionCount = read32(&seg->nsects, isBig); + const section_64 *sects = reinterpret_cast<const section_64*> + (lc + sizeof(segment_command_64)); + const unsigned lcSize = sizeof(segment_command_64) + + sectionCount*sizeof(section_64); + // Verify sections don't extend beyond end of segment load command. + if (lcSize > size) + return true; + for (unsigned i=0; i < sectionCount; ++i) { + const section_64 *sect = §s[i]; + Section section; + section.segmentName = getString16(sect->segname); + section.sectionName = getString16(sect->sectname); + section.type = (SectionType)(read32(§->flags, isBig) & + SECTION_TYPE); + section.attributes = read32(§->flags, isBig) & SECTION_ATTRIBUTES; + section.alignment = 1 << read32(§->align, isBig); + section.address = read64(§->addr, isBig); + const uint8_t *content = + (const uint8_t *)start + read32(§->offset, isBig); + size_t contentSize = read64(§->size, isBig); + // Note: this assign() is copying the content bytes. Ideally, + // we can use a custom allocator for vector to avoid the copy. + section.content = llvm::makeArrayRef(content, contentSize); + appendRelocations(section.relocations, mb->getBuffer(), isBig, + read32(§->reloff, isBig), + read32(§->nreloc, isBig)); + if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { + appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(), + isBig, + indirectSymbolTableOffset, + indirectSymbolTableCount, + read32(§->reserved1, isBig), + contentSize/4); + } + f->sections.push_back(section); + } + } + break; + case LC_SEGMENT: + if (!is64) { + const segment_command *seg = + reinterpret_cast<const segment_command*>(lc); + const unsigned sectionCount = read32(&seg->nsects, isBig); + const section *sects = reinterpret_cast<const section*> + (lc + sizeof(segment_command)); + const unsigned lcSize = sizeof(segment_command) + + sectionCount*sizeof(section); + // Verify sections don't extend beyond end of segment load command. + if (lcSize > size) + return true; + for (unsigned i=0; i < sectionCount; ++i) { + const section *sect = §s[i]; + Section section; + section.segmentName = getString16(sect->segname); + section.sectionName = getString16(sect->sectname); + section.type = (SectionType)(read32(§->flags, isBig) & + SECTION_TYPE); + section.attributes = + read32((const uint8_t *)§->flags, isBig) & SECTION_ATTRIBUTES; + section.alignment = 1 << read32(§->align, isBig); + section.address = read32(§->addr, isBig); + const uint8_t *content = + (const uint8_t *)start + read32(§->offset, isBig); + size_t contentSize = read32(§->size, isBig); + // Note: this assign() is copying the content bytes. Ideally, + // we can use a custom allocator for vector to avoid the copy. + section.content = llvm::makeArrayRef(content, contentSize); + appendRelocations(section.relocations, mb->getBuffer(), isBig, + read32(§->reloff, isBig), + read32(§->nreloc, isBig)); + if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { + appendIndirectSymbols( + section.indirectSymbols, mb->getBuffer(), isBig, + indirectSymbolTableOffset, indirectSymbolTableCount, + read32(§->reserved1, isBig), contentSize / 4); + } + f->sections.push_back(section); + } + } + break; + case LC_SYMTAB: { + const symtab_command *st = reinterpret_cast<const symtab_command*>(lc); + const char *strings = start + read32(&st->stroff, isBig); + const uint32_t strSize = read32(&st->strsize, isBig); + // Validate string pool and symbol table all in buffer. + if (read32((const uint8_t *)&st->stroff, isBig) + + read32((const uint8_t *)&st->strsize, isBig) > + objSize) + return true; + if (is64) { + const uint32_t symOffset = read32(&st->symoff, isBig); + const uint32_t symCount = read32(&st->nsyms, isBig); + if ( symOffset+(symCount*sizeof(nlist_64)) > objSize) + return true; + const nlist_64 *symbols = + reinterpret_cast<const nlist_64 *>(start + symOffset); + // Convert each nlist_64 to a lld::mach_o::normalized::Symbol. + for(uint32_t i=0; i < symCount; ++i) { + nlist_64 tempSym; + memcpy(&tempSym, &symbols[i], sizeof(nlist_64)); + const nlist_64 *sin = &tempSym; + if (isBig != llvm::sys::IsBigEndianHost) + swapStruct(tempSym); + Symbol sout; + if (sin->n_strx > strSize) + return true; + sout.name = &strings[sin->n_strx]; + sout.type = static_cast<NListType>(sin->n_type & (N_STAB|N_TYPE)); + sout.scope = (sin->n_type & (N_PEXT|N_EXT)); + sout.sect = sin->n_sect; + sout.desc = sin->n_desc; + sout.value = sin->n_value; + if (sin->n_type & N_STAB) + f->stabsSymbols.push_back(sout); + else if (sout.type == N_UNDF) + f->undefinedSymbols.push_back(sout); + else if (sin->n_type & N_EXT) + f->globalSymbols.push_back(sout); + else + f->localSymbols.push_back(sout); + } + } else { + const uint32_t symOffset = read32(&st->symoff, isBig); + const uint32_t symCount = read32(&st->nsyms, isBig); + if ( symOffset+(symCount*sizeof(nlist)) > objSize) + return true; + const nlist *symbols = + reinterpret_cast<const nlist *>(start + symOffset); + // Convert each nlist to a lld::mach_o::normalized::Symbol. + for(uint32_t i=0; i < symCount; ++i) { + const nlist *sin = &symbols[i]; + nlist tempSym; + if (isBig != llvm::sys::IsBigEndianHost) { + tempSym = *sin; swapStruct(tempSym); sin = &tempSym; + } + Symbol sout; + if (sin->n_strx > strSize) + return true; + sout.name = &strings[sin->n_strx]; + sout.type = (NListType)(sin->n_type & N_TYPE); + sout.scope = (sin->n_type & (N_PEXT|N_EXT)); + sout.sect = sin->n_sect; + sout.desc = sin->n_desc; + sout.value = sin->n_value; + if (sout.type == N_UNDF) + f->undefinedSymbols.push_back(sout); + else if (sout.scope == (SymbolScope)N_EXT) + f->globalSymbols.push_back(sout); + else if (sin->n_type & N_STAB) + f->stabsSymbols.push_back(sout); + else + f->localSymbols.push_back(sout); + } + } + } + break; + case LC_ID_DYLIB: { + const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc); + f->installName = lc + read32(&dl->dylib.name, isBig); + f->currentVersion = read32(&dl->dylib.current_version, isBig); + f->compatVersion = read32(&dl->dylib.compatibility_version, isBig); + } + break; + case LC_DATA_IN_CODE: { + const linkedit_data_command *ldc = + reinterpret_cast<const linkedit_data_command*>(lc); + dataInCode = reinterpret_cast<const data_in_code_entry *>( + start + read32(&ldc->dataoff, isBig)); + dataInCodeSize = read32(&ldc->datasize, isBig); + } + break; + case LC_LOAD_DYLIB: + case LC_LOAD_WEAK_DYLIB: + case LC_REEXPORT_DYLIB: + case LC_LOAD_UPWARD_DYLIB: { + const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc); + DependentDylib entry; + entry.path = lc + read32(&dl->dylib.name, isBig); + entry.kind = LoadCommandType(cmd); + entry.compatVersion = read32(&dl->dylib.compatibility_version, isBig); + entry.currentVersion = read32(&dl->dylib.current_version, isBig); + f->dependentDylibs.push_back(entry); + } + break; + case LC_RPATH: { + const rpath_command *rpc = reinterpret_cast<const rpath_command *>(lc); + f->rpaths.push_back(lc + read32(&rpc->path, isBig)); + } + break; + case LC_DYLD_INFO: + case LC_DYLD_INFO_ONLY: + dyldInfo = reinterpret_cast<const dyld_info_command*>(lc); + break; + case LC_VERSION_MIN_MACOSX: + case LC_VERSION_MIN_IPHONEOS: + case LC_VERSION_MIN_WATCHOS: + case LC_VERSION_MIN_TVOS: + // If we are emitting an object file, then we may take the load command + // kind from these commands and pass it on to the output + // file. + f->minOSVersionKind = (LoadCommandType)cmd; + break; + } + return false; + }); + if (ec) + return std::move(ec); + + if (dataInCode) { + // Convert on-disk data_in_code_entry array to DataInCode vector. + for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) { + DataInCode entry; + entry.offset = read32(&dataInCode[i].offset, isBig); + entry.length = read16(&dataInCode[i].length, isBig); + entry.kind = + (DataRegionType)read16((const uint8_t *)&dataInCode[i].kind, isBig); + f->dataInCode.push_back(entry); + } + } + + if (dyldInfo) { + // If any exports, extract and add to normalized exportInfo vector. + if (dyldInfo->export_size) { + const uint8_t *trieStart = reinterpret_cast<const uint8_t*>(start + + dyldInfo->export_off); + ArrayRef<uint8_t> trie(trieStart, dyldInfo->export_size); + for (const ExportEntry &trieExport : MachOObjectFile::exports(trie)) { + Export normExport; + normExport.name = trieExport.name().copy(f->ownedAllocations); + normExport.offset = trieExport.address(); + normExport.kind = ExportSymbolKind(trieExport.flags() & EXPORT_SYMBOL_FLAGS_KIND_MASK); + normExport.flags = trieExport.flags() & ~EXPORT_SYMBOL_FLAGS_KIND_MASK; + normExport.otherOffset = trieExport.other(); + if (!trieExport.otherName().empty()) + normExport.otherName = trieExport.otherName().copy(f->ownedAllocations); + f->exportInfo.push_back(normExport); + } + } + } + + return std::move(f); +} + +class MachOObjectReader : public Reader { +public: + MachOObjectReader(MachOLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, MemoryBufferRef mb) const override { + return (magic == llvm::sys::fs::file_magic::macho_object && + mb.getBufferSize() > 32); + } + + ErrorOr<std::unique_ptr<File>> + loadFile(std::unique_ptr<MemoryBuffer> mb, + const Registry ®istry) const override { + std::unique_ptr<File> ret = + llvm::make_unique<MachOFile>(std::move(mb), &_ctx); + return std::move(ret); + } + +private: + MachOLinkingContext &_ctx; +}; + +class MachODylibReader : public Reader { +public: + MachODylibReader(MachOLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, MemoryBufferRef mb) const override { + switch (magic) { + case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib: + case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: + return mb.getBufferSize() > 32; + default: + return false; + } + } + + ErrorOr<std::unique_ptr<File>> + loadFile(std::unique_ptr<MemoryBuffer> mb, + const Registry ®istry) const override { + std::unique_ptr<File> ret = + llvm::make_unique<MachODylibFile>(std::move(mb), &_ctx); + return std::move(ret); + } + +private: + MachOLinkingContext &_ctx; +}; + +} // namespace normalized +} // namespace mach_o + +void Registry::addSupportMachOObjects(MachOLinkingContext &ctx) { + MachOLinkingContext::Arch arch = ctx.arch(); + add(std::unique_ptr<Reader>(new mach_o::normalized::MachOObjectReader(ctx))); + add(std::unique_ptr<Reader>(new mach_o::normalized::MachODylibReader(ctx))); + addKindTable(Reference::KindNamespace::mach_o, ctx.archHandler().kindArch(), + ctx.archHandler().kindStrings()); + add(std::unique_ptr<YamlIOTaggedDocumentHandler>( + new mach_o::MachOYamlIOTaggedDocumentHandler(arch))); +} + + +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h new file mode 100644 index 000000000000..d69c5389e9d6 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h @@ -0,0 +1,215 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H +#define LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H + +#include "MachONormalizedFile.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MachO.h" +#include <system_error> + +namespace lld { +namespace mach_o { +namespace normalized { + +class ByteBuffer { +public: + ByteBuffer() : _ostream(_bytes) { } + + void append_byte(uint8_t b) { + _ostream << b; + } + void append_uleb128(uint64_t value) { + llvm::encodeULEB128(value, _ostream); + } + void append_uleb128Fixed(uint64_t value, unsigned byteCount) { + unsigned min = llvm::getULEB128Size(value); + assert(min <= byteCount); + unsigned pad = byteCount - min; + llvm::encodeULEB128(value, _ostream, pad); + } + void append_sleb128(int64_t value) { + llvm::encodeSLEB128(value, _ostream); + } + void append_string(StringRef str) { + _ostream << str; + append_byte(0); + } + void align(unsigned alignment) { + while ( (_ostream.tell() % alignment) != 0 ) + append_byte(0); + } + size_t size() { + return _ostream.tell(); + } + const uint8_t *bytes() { + return reinterpret_cast<const uint8_t*>(_ostream.str().data()); + } + +private: + SmallVector<char, 128> _bytes; + // Stream ivar must be after SmallVector ivar to construct properly. + llvm::raw_svector_ostream _ostream; +}; + +using namespace llvm::support::endian; +using llvm::sys::getSwappedBytes; + +template<typename T> +static inline uint16_t read16(const T *loc, bool isBig) { + assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); + return isBig ? read16be(loc) : read16le(loc); +} + +template<typename T> +static inline uint32_t read32(const T *loc, bool isBig) { + assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); + return isBig ? read32be(loc) : read32le(loc); +} + +template<typename T> +static inline uint64_t read64(const T *loc, bool isBig) { + assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); + return isBig ? read64be(loc) : read64le(loc); +} + +inline void write16(uint8_t *loc, uint16_t value, bool isBig) { + if (isBig) + write16be(loc, value); + else + write16le(loc, value); +} + +inline void write32(uint8_t *loc, uint32_t value, bool isBig) { + if (isBig) + write32be(loc, value); + else + write32le(loc, value); +} + +inline void write64(uint8_t *loc, uint64_t value, bool isBig) { + if (isBig) + write64be(loc, value); + else + write64le(loc, value); +} + +inline uint32_t +bitFieldExtract(uint32_t value, bool isBigEndianBigField, uint8_t firstBit, + uint8_t bitCount) { + const uint32_t mask = ((1<<bitCount)-1); + const uint8_t shift = isBigEndianBigField ? (32-firstBit-bitCount) : firstBit; + return (value >> shift) & mask; +} + +inline void +bitFieldSet(uint32_t &bits, bool isBigEndianBigField, uint32_t newBits, + uint8_t firstBit, uint8_t bitCount) { + const uint32_t mask = ((1<<bitCount)-1); + assert((newBits & mask) == newBits); + const uint8_t shift = isBigEndianBigField ? (32-firstBit-bitCount) : firstBit; + bits &= ~(mask << shift); + bits |= (newBits << shift); +} + +inline Relocation unpackRelocation(const llvm::MachO::any_relocation_info &r, + bool isBigEndian) { + uint32_t r0 = read32(&r.r_word0, isBigEndian); + uint32_t r1 = read32(&r.r_word1, isBigEndian); + + Relocation result; + if (r0 & llvm::MachO::R_SCATTERED) { + // scattered relocation record always laid out like big endian bit field + result.offset = bitFieldExtract(r0, true, 8, 24); + result.scattered = true; + result.type = (RelocationInfoType) + bitFieldExtract(r0, true, 4, 4); + result.length = bitFieldExtract(r0, true, 2, 2); + result.pcRel = bitFieldExtract(r0, true, 1, 1); + result.isExtern = false; + result.value = r1; + result.symbol = 0; + } else { + result.offset = r0; + result.scattered = false; + result.type = (RelocationInfoType) + bitFieldExtract(r1, isBigEndian, 28, 4); + result.length = bitFieldExtract(r1, isBigEndian, 25, 2); + result.pcRel = bitFieldExtract(r1, isBigEndian, 24, 1); + result.isExtern = bitFieldExtract(r1, isBigEndian, 27, 1); + result.value = 0; + result.symbol = bitFieldExtract(r1, isBigEndian, 0, 24); + } + return result; +} + + +inline llvm::MachO::any_relocation_info +packRelocation(const Relocation &r, bool swap, bool isBigEndian) { + uint32_t r0 = 0; + uint32_t r1 = 0; + + if (r.scattered) { + r1 = r.value; + bitFieldSet(r0, true, r.offset, 8, 24); + bitFieldSet(r0, true, r.type, 4, 4); + bitFieldSet(r0, true, r.length, 2, 2); + bitFieldSet(r0, true, r.pcRel, 1, 1); + bitFieldSet(r0, true, r.scattered, 0, 1); // R_SCATTERED + } else { + r0 = r.offset; + bitFieldSet(r1, isBigEndian, r.type, 28, 4); + bitFieldSet(r1, isBigEndian, r.isExtern, 27, 1); + bitFieldSet(r1, isBigEndian, r.length, 25, 2); + bitFieldSet(r1, isBigEndian, r.pcRel, 24, 1); + bitFieldSet(r1, isBigEndian, r.symbol, 0, 24); + } + + llvm::MachO::any_relocation_info result; + result.r_word0 = swap ? getSwappedBytes(r0) : r0; + result.r_word1 = swap ? getSwappedBytes(r1) : r1; + return result; +} + +inline StringRef getString16(const char s[16]) { + StringRef x = s; + if ( x.size() > 16 ) + return x.substr(0, 16); + else + return x; +} + +inline void setString16(StringRef str, char s[16]) { + memset(s, 0, 16); + memcpy(s, str.begin(), (str.size() > 16) ? 16: str.size()); +} + +// Implemented in normalizedToAtoms() and used by normalizedFromAtoms() so +// that the same table can be used to map mach-o sections to and from +// DefinedAtom::ContentType. +void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, + StringRef &segmentName, + StringRef §ionName, + SectionType §ionType, + SectionAttr §ionAttrs, + bool &relocsToDefinedCanBeImplicit); + +} // namespace normalized +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp new file mode 100644 index 000000000000..e853faf9112e --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp @@ -0,0 +1,1551 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation converts normalized +/// mach-o in memory to mach-o binary on disk. +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// ^ +/// | +/// | +/// +------------+ +/// | normalized | +/// +------------+ + +#include "MachONormalizedFile.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <functional> +#include <list> +#include <map> +#include <system_error> + +using namespace llvm::MachO; + +namespace lld { +namespace mach_o { +namespace normalized { + +struct TrieNode; // Forward declaration. + +struct TrieEdge : public llvm::ilist_node<TrieEdge> { + TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {} + + StringRef _subString; + struct TrieNode *_child; +}; + +} // namespace normalized +} // namespace mach_o +} // namespace lld + + +namespace llvm { +using lld::mach_o::normalized::TrieEdge; +template <> +struct ilist_alloc_traits<TrieEdge> : ilist_noalloc_traits<TrieEdge> {}; +} // namespace llvm + + +namespace lld { +namespace mach_o { +namespace normalized { + +struct TrieNode { + typedef llvm::ilist<TrieEdge> TrieEdgeList; + + TrieNode(StringRef s) + : _cummulativeString(s), _address(0), _flags(0), _other(0), + _trieOffset(0), _hasExportInfo(false) {} + ~TrieNode() = default; + + void addSymbol(const Export &entry, BumpPtrAllocator &allocator, + std::vector<TrieNode *> &allNodes); + + void addOrderedNodes(const Export &entry, + std::vector<TrieNode *> &allNodes); + bool updateOffset(uint32_t &offset); + void appendToByteBuffer(ByteBuffer &out); + +private: + StringRef _cummulativeString; + TrieEdgeList _children; + uint64_t _address; + uint64_t _flags; + uint64_t _other; + StringRef _importedName; + uint32_t _trieOffset; + bool _hasExportInfo; + bool _ordered = false; +}; + +/// Utility class for writing a mach-o binary file given an in-memory +/// normalized file. +class MachOFileLayout { +public: + /// All layout computation is done in the constructor. + MachOFileLayout(const NormalizedFile &file); + + /// Returns the final file size as computed in the constructor. + size_t size() const; + + // Returns size of the mach_header and load commands. + size_t headerAndLoadCommandsSize() const; + + /// Writes the normalized file as a binary mach-o file to the specified + /// path. This does not have a stream interface because the generated + /// file may need the 'x' bit set. + llvm::Error writeBinary(StringRef path); + +private: + uint32_t loadCommandsSize(uint32_t &count); + void buildFileOffsets(); + void writeMachHeader(); + llvm::Error writeLoadCommands(); + void writeSectionContent(); + void writeRelocations(); + void writeSymbolTable(); + void writeRebaseInfo(); + void writeBindingInfo(); + void writeLazyBindingInfo(); + void writeExportInfo(); + void writeFunctionStartsInfo(); + void writeDataInCodeInfo(); + void writeLinkEditContent(); + void buildLinkEditInfo(); + void buildRebaseInfo(); + void buildBindInfo(); + void buildLazyBindInfo(); + void buildExportTrie(); + void computeFunctionStartsSize(); + void computeDataInCodeSize(); + void computeSymbolTableSizes(); + void buildSectionRelocations(); + void appendSymbols(const std::vector<Symbol> &symbols, + uint32_t &symOffset, uint32_t &strOffset); + uint32_t indirectSymbolIndex(const Section §, uint32_t &index); + uint32_t indirectSymbolElementSize(const Section §); + + // For use as template parameter to load command methods. + struct MachO64Trait { + typedef llvm::MachO::segment_command_64 command; + typedef llvm::MachO::section_64 section; + enum { LC = llvm::MachO::LC_SEGMENT_64 }; + }; + + // For use as template parameter to load command methods. + struct MachO32Trait { + typedef llvm::MachO::segment_command command; + typedef llvm::MachO::section section; + enum { LC = llvm::MachO::LC_SEGMENT }; + }; + + template <typename T> + llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc); + template <typename T> llvm::Error writeSegmentLoadCommands(uint8_t *&lc); + + uint32_t pointerAlign(uint32_t value); + static StringRef dyldPath(); + + struct SegExtraInfo { + uint32_t fileOffset; + uint32_t fileSize; + std::vector<const Section*> sections; + }; + typedef std::map<const Segment*, SegExtraInfo> SegMap; + struct SectionExtraInfo { + uint32_t fileOffset; + }; + typedef std::map<const Section*, SectionExtraInfo> SectionMap; + + const NormalizedFile &_file; + std::error_code _ec; + uint8_t *_buffer; + const bool _is64; + const bool _swap; + const bool _bigEndianArch; + uint64_t _seg1addr; + uint32_t _startOfLoadCommands; + uint32_t _countOfLoadCommands; + uint32_t _endOfLoadCommands; + uint32_t _startOfRelocations; + uint32_t _startOfFunctionStarts; + uint32_t _startOfDataInCode; + uint32_t _startOfSymbols; + uint32_t _startOfIndirectSymbols; + uint32_t _startOfSymbolStrings; + uint32_t _endOfSymbolStrings; + uint32_t _symbolTableLocalsStartIndex; + uint32_t _symbolTableGlobalsStartIndex; + uint32_t _symbolTableUndefinesStartIndex; + uint32_t _symbolStringPoolSize; + uint32_t _symbolTableSize; + uint32_t _functionStartsSize; + uint32_t _dataInCodeSize; + uint32_t _indirectSymbolTableCount; + // Used in object file creation only + uint32_t _startOfSectionsContent; + uint32_t _endOfSectionsContent; + // Used in final linked image only + uint32_t _startOfLinkEdit; + uint32_t _startOfRebaseInfo; + uint32_t _endOfRebaseInfo; + uint32_t _startOfBindingInfo; + uint32_t _endOfBindingInfo; + uint32_t _startOfLazyBindingInfo; + uint32_t _endOfLazyBindingInfo; + uint32_t _startOfExportTrie; + uint32_t _endOfExportTrie; + uint32_t _endOfLinkEdit; + uint64_t _addressOfLinkEdit; + SegMap _segInfo; + SectionMap _sectInfo; + ByteBuffer _rebaseInfo; + ByteBuffer _bindingInfo; + ByteBuffer _lazyBindingInfo; + ByteBuffer _weakBindingInfo; + ByteBuffer _exportTrie; +}; + +size_t headerAndLoadCommandsSize(const NormalizedFile &file) { + MachOFileLayout layout(file); + return layout.headerAndLoadCommandsSize(); +} + +StringRef MachOFileLayout::dyldPath() { + return "/usr/lib/dyld"; +} + +uint32_t MachOFileLayout::pointerAlign(uint32_t value) { + return llvm::alignTo(value, _is64 ? 8 : 4); +} + + +size_t MachOFileLayout::headerAndLoadCommandsSize() const { + return _endOfLoadCommands; +} + +MachOFileLayout::MachOFileLayout(const NormalizedFile &file) + : _file(file), + _is64(MachOLinkingContext::is64Bit(file.arch)), + _swap(!MachOLinkingContext::isHostEndian(file.arch)), + _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)), + _seg1addr(INT64_MAX) { + _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header); + const size_t segCommandBaseSize = + (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); + const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section)); + if (file.fileType == llvm::MachO::MH_OBJECT) { + // object files have just one segment load command containing all sections + _endOfLoadCommands = _startOfLoadCommands + + segCommandBaseSize + + file.sections.size() * sectsSize + + sizeof(symtab_command); + _countOfLoadCommands = 2; + if (file.hasMinVersionLoadCommand) { + _endOfLoadCommands += sizeof(version_min_command); + _countOfLoadCommands++; + } + if (!_file.functionStarts.empty()) { + _endOfLoadCommands += sizeof(linkedit_data_command); + _countOfLoadCommands++; + } + if (_file.generateDataInCodeLoadCommand) { + _endOfLoadCommands += sizeof(linkedit_data_command); + _countOfLoadCommands++; + } + // Assign file offsets to each section. + _startOfSectionsContent = _endOfLoadCommands; + unsigned relocCount = 0; + uint64_t offset = _startOfSectionsContent; + for (const Section § : file.sections) { + if (isZeroFillSection(sect.type)) + _sectInfo[§].fileOffset = 0; + else { + offset = llvm::alignTo(offset, sect.alignment); + _sectInfo[§].fileOffset = offset; + offset += sect.content.size(); + } + relocCount += sect.relocations.size(); + } + _endOfSectionsContent = offset; + + computeSymbolTableSizes(); + computeFunctionStartsSize(); + computeDataInCodeSize(); + + // Align start of relocations. + _startOfRelocations = pointerAlign(_endOfSectionsContent); + _startOfFunctionStarts = _startOfRelocations + relocCount * 8; + _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; + _startOfSymbols = _startOfDataInCode + _dataInCodeSize; + // Add Indirect symbol table. + _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; + // Align start of symbol table and symbol strings. + _startOfSymbolStrings = _startOfIndirectSymbols + + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); + _endOfSymbolStrings = _startOfSymbolStrings + + pointerAlign(_symbolStringPoolSize); + _endOfLinkEdit = _endOfSymbolStrings; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "MachOFileLayout()\n" + << " startOfLoadCommands=" << _startOfLoadCommands << "\n" + << " countOfLoadCommands=" << _countOfLoadCommands << "\n" + << " endOfLoadCommands=" << _endOfLoadCommands << "\n" + << " startOfRelocations=" << _startOfRelocations << "\n" + << " startOfSymbols=" << _startOfSymbols << "\n" + << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" + << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" + << " startOfSectionsContent=" << _startOfSectionsContent << "\n" + << " endOfSectionsContent=" << _endOfSectionsContent << "\n"); + } else { + // Final linked images have one load command per segment. + _endOfLoadCommands = _startOfLoadCommands + + loadCommandsSize(_countOfLoadCommands); + + // Assign section file offsets. + buildFileOffsets(); + buildLinkEditInfo(); + + // LINKEDIT of final linked images has in order: + // rebase info, binding info, lazy binding info, weak binding info, + // data-in-code, symbol table, indirect symbol table, symbol table strings. + _startOfRebaseInfo = _startOfLinkEdit; + _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size(); + _startOfBindingInfo = _endOfRebaseInfo; + _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size(); + _startOfLazyBindingInfo = _endOfBindingInfo; + _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size(); + _startOfExportTrie = _endOfLazyBindingInfo; + _endOfExportTrie = _startOfExportTrie + _exportTrie.size(); + _startOfFunctionStarts = _endOfExportTrie; + _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; + _startOfSymbols = _startOfDataInCode + _dataInCodeSize; + _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; + _startOfSymbolStrings = _startOfIndirectSymbols + + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); + _endOfSymbolStrings = _startOfSymbolStrings + + pointerAlign(_symbolStringPoolSize); + _endOfLinkEdit = _endOfSymbolStrings; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "MachOFileLayout()\n" + << " startOfLoadCommands=" << _startOfLoadCommands << "\n" + << " countOfLoadCommands=" << _countOfLoadCommands << "\n" + << " endOfLoadCommands=" << _endOfLoadCommands << "\n" + << " startOfLinkEdit=" << _startOfLinkEdit << "\n" + << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n" + << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n" + << " startOfBindingInfo=" << _startOfBindingInfo << "\n" + << " endOfBindingInfo=" << _endOfBindingInfo << "\n" + << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n" + << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n" + << " startOfExportTrie=" << _startOfExportTrie << "\n" + << " endOfExportTrie=" << _endOfExportTrie << "\n" + << " startOfFunctionStarts=" << _startOfFunctionStarts << "\n" + << " startOfDataInCode=" << _startOfDataInCode << "\n" + << " startOfSymbols=" << _startOfSymbols << "\n" + << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" + << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" + << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n"); + } +} + +uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count) { + uint32_t size = 0; + count = 0; + + const size_t segCommandSize = + (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); + const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section)); + + // Add LC_SEGMENT for each segment. + size += _file.segments.size() * segCommandSize; + count += _file.segments.size(); + // Add section record for each section. + size += _file.sections.size() * sectionSize; + + // If creating a dylib, add LC_ID_DYLIB. + if (_file.fileType == llvm::MachO::MH_DYLIB) { + size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1); + ++count; + } + + // Add LC_DYLD_INFO + size += sizeof(dyld_info_command); + ++count; + + // Add LC_SYMTAB + size += sizeof(symtab_command); + ++count; + + // Add LC_DYSYMTAB + if (_file.fileType != llvm::MachO::MH_PRELOAD) { + size += sizeof(dysymtab_command); + ++count; + } + + // If main executable add LC_LOAD_DYLINKER + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1); + ++count; + } + + // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, + // LC_VERSION_MIN_TVOS + if (_file.hasMinVersionLoadCommand) { + size += sizeof(version_min_command); + ++count; + } + + // Add LC_SOURCE_VERSION + size += sizeof(source_version_command); + ++count; + + // If main executable add LC_MAIN + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + size += sizeof(entry_point_command); + ++count; + } + + // Add LC_LOAD_DYLIB for each dependent dylib. + for (const DependentDylib &dep : _file.dependentDylibs) { + size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1); + ++count; + } + + // Add LC_RPATH + for (const StringRef &path : _file.rpaths) { + size += pointerAlign(sizeof(rpath_command) + path.size() + 1); + ++count; + } + + // Add LC_FUNCTION_STARTS if needed + if (!_file.functionStarts.empty()) { + size += sizeof(linkedit_data_command); + ++count; + } + + // Add LC_DATA_IN_CODE if requested. Note, we do encode zero length entries. + // FIXME: Zero length entries is only to match ld64. Should we change this? + if (_file.generateDataInCodeLoadCommand) { + size += sizeof(linkedit_data_command); + ++count; + } + + return size; +} + +static bool overlaps(const Segment &s1, const Segment &s2) { + if (s2.address >= s1.address+s1.size) + return false; + if (s1.address >= s2.address+s2.size) + return false; + return true; +} + +static bool overlaps(const Section &s1, const Section &s2) { + if (s2.address >= s1.address+s1.content.size()) + return false; + if (s1.address >= s2.address+s2.content.size()) + return false; + return true; +} + +void MachOFileLayout::buildFileOffsets() { + // Verify no segments overlap + for (const Segment &sg1 : _file.segments) { + for (const Segment &sg2 : _file.segments) { + if (&sg1 == &sg2) + continue; + if (overlaps(sg1,sg2)) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + } + + // Verify no sections overlap + for (const Section &s1 : _file.sections) { + for (const Section &s2 : _file.sections) { + if (&s1 == &s2) + continue; + if (overlaps(s1,s2)) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + } + + // Build side table of extra info about segments and sections. + SegExtraInfo t; + t.fileOffset = 0; + for (const Segment &sg : _file.segments) { + _segInfo[&sg] = t; + } + SectionExtraInfo t2; + t2.fileOffset = 0; + // Assign sections to segments. + for (const Section &s : _file.sections) { + _sectInfo[&s] = t2; + bool foundSegment = false; + for (const Segment &sg : _file.segments) { + if (sg.name.equals(s.segmentName)) { + if ((s.address >= sg.address) + && (s.address+s.content.size() <= sg.address+sg.size)) { + _segInfo[&sg].sections.push_back(&s); + foundSegment = true; + break; + } + } + } + if (!foundSegment) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + + // Assign file offsets. + uint32_t fileOffset = 0; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "buildFileOffsets()\n"); + for (const Segment &sg : _file.segments) { + _segInfo[&sg].fileOffset = fileOffset; + if ((_seg1addr == INT64_MAX) && sg.init_access) + _seg1addr = sg.address; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << " segment=" << sg.name + << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n"); + + uint32_t segFileSize = 0; + // A segment that is not zero-fill must use a least one page of disk space. + if (sg.init_access) + segFileSize = _file.pageSize; + for (const Section *s : _segInfo[&sg].sections) { + uint32_t sectOffset = s->address - sg.address; + uint32_t sectFileSize = + isZeroFillSection(s->type) ? 0 : s->content.size(); + segFileSize = std::max(segFileSize, sectOffset + sectFileSize); + + _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << " section=" << s->sectionName + << ", fileOffset=" << fileOffset << "\n"); + } + + // round up all segments to page aligned, except __LINKEDIT + if (!sg.name.equals("__LINKEDIT")) { + _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize); + fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize); + } + _addressOfLinkEdit = sg.address + sg.size; + } + _startOfLinkEdit = fileOffset; +} + +size_t MachOFileLayout::size() const { + return _endOfSymbolStrings; +} + +void MachOFileLayout::writeMachHeader() { + auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch); + // dynamic x86 executables on newer OS version should also set the + // CPU_SUBTYPE_LIB64 mask in the CPU subtype. + // FIXME: Check that this is a dynamic executable, not a static one. + if (_file.fileType == llvm::MachO::MH_EXECUTE && + cpusubtype == CPU_SUBTYPE_X86_64_ALL && + _file.os == MachOLinkingContext::OS::macOSX) { + uint32_t version; + bool failed = MachOLinkingContext::parsePackedVersion("10.5", version); + if (!failed && _file.minOSverson >= version) + cpusubtype |= CPU_SUBTYPE_LIB64; + } + + mach_header *mh = reinterpret_cast<mach_header*>(_buffer); + mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC; + mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch); + mh->cpusubtype = cpusubtype; + mh->filetype = _file.fileType; + mh->ncmds = _countOfLoadCommands; + mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands; + mh->flags = _file.flags; + if (_swap) + swapStruct(*mh); +} + +uint32_t MachOFileLayout::indirectSymbolIndex(const Section §, + uint32_t &index) { + if (sect.indirectSymbols.empty()) + return 0; + uint32_t result = index; + index += sect.indirectSymbols.size(); + return result; +} + +uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) { + if (sect.indirectSymbols.empty()) + return 0; + if (sect.type != S_SYMBOL_STUBS) + return 0; + return sect.content.size() / sect.indirectSymbols.size(); +} + +template <typename T> +llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) { + typename T::command* seg = reinterpret_cast<typename T::command*>(lc); + seg->cmd = T::LC; + seg->cmdsize = sizeof(typename T::command) + + _file.sections.size() * sizeof(typename T::section); + uint8_t *next = lc + seg->cmdsize; + memset(seg->segname, 0, 16); + seg->vmaddr = 0; + seg->vmsize = _file.sections.back().address + + _file.sections.back().content.size(); + seg->fileoff = _endOfLoadCommands; + seg->filesize = _sectInfo[&_file.sections.back()].fileOffset + + _file.sections.back().content.size() - + _sectInfo[&_file.sections.front()].fileOffset; + seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->nsects = _file.sections.size(); + seg->flags = 0; + if (_swap) + swapStruct(*seg); + typename T::section *sout = reinterpret_cast<typename T::section*> + (lc+sizeof(typename T::command)); + uint32_t relOffset = _startOfRelocations; + uint32_t indirectSymRunningIndex = 0; + for (const Section &sin : _file.sections) { + setString16(sin.sectionName, sout->sectname); + setString16(sin.segmentName, sout->segname); + sout->addr = sin.address; + sout->size = sin.content.size(); + sout->offset = _sectInfo[&sin].fileOffset; + sout->align = llvm::Log2_32(sin.alignment); + sout->reloff = sin.relocations.empty() ? 0 : relOffset; + sout->nreloc = sin.relocations.size(); + sout->flags = sin.type | sin.attributes; + sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex); + sout->reserved2 = indirectSymbolElementSize(sin); + relOffset += sin.relocations.size() * sizeof(any_relocation_info); + if (_swap) + swapStruct(*sout); + ++sout; + } + lc = next; + return llvm::Error::success(); +} + +template <typename T> +llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) { + uint32_t indirectSymRunningIndex = 0; + for (const Segment &seg : _file.segments) { + // Link edit has no sections and a custom range of address, so handle it + // specially. + SegExtraInfo &segInfo = _segInfo[&seg]; + if (seg.name.equals("__LINKEDIT")) { + size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit; + typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); + cmd->cmd = T::LC; + cmd->cmdsize = sizeof(typename T::command); + uint8_t *next = lc + cmd->cmdsize; + setString16("__LINKEDIT", cmd->segname); + cmd->vmaddr = _addressOfLinkEdit; + cmd->vmsize = llvm::alignTo(linkeditSize, _file.pageSize); + cmd->fileoff = _startOfLinkEdit; + cmd->filesize = linkeditSize; + cmd->initprot = seg.init_access; + cmd->maxprot = seg.max_access; + cmd->nsects = 0; + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + lc = next; + continue; + } + // Write segment command with trailing sections. + typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); + cmd->cmd = T::LC; + cmd->cmdsize = sizeof(typename T::command) + + segInfo.sections.size() * sizeof(typename T::section); + uint8_t *next = lc + cmd->cmdsize; + setString16(seg.name, cmd->segname); + cmd->vmaddr = seg.address; + cmd->vmsize = seg.size; + cmd->fileoff = segInfo.fileOffset; + cmd->filesize = segInfo.fileSize; + cmd->initprot = seg.init_access; + cmd->maxprot = seg.max_access; + cmd->nsects = segInfo.sections.size(); + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + typename T::section *sect = reinterpret_cast<typename T::section*> + (lc+sizeof(typename T::command)); + for (const Section *section : segInfo.sections) { + setString16(section->sectionName, sect->sectname); + setString16(section->segmentName, sect->segname); + sect->addr = section->address; + sect->size = section->content.size(); + if (isZeroFillSection(section->type)) + sect->offset = 0; + else + sect->offset = section->address - seg.address + segInfo.fileOffset; + sect->align = llvm::Log2_32(section->alignment); + sect->reloff = 0; + sect->nreloc = 0; + sect->flags = section->type | section->attributes; + sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex); + sect->reserved2 = indirectSymbolElementSize(*section); + if (_swap) + swapStruct(*sect); + ++sect; + } + lc = reinterpret_cast<uint8_t*>(next); + } + return llvm::Error::success(); +} + +static void writeVersionMinLoadCommand(const NormalizedFile &_file, + bool _swap, + uint8_t *&lc) { + if (!_file.hasMinVersionLoadCommand) + return; + version_min_command *vm = reinterpret_cast<version_min_command*>(lc); + switch (_file.os) { + case MachOLinkingContext::OS::unknown: + vm->cmd = _file.minOSVersionKind; + vm->cmdsize = sizeof(version_min_command); + vm->version = _file.minOSverson; + vm->sdk = 0; + break; + case MachOLinkingContext::OS::macOSX: + vm->cmd = LC_VERSION_MIN_MACOSX; + vm->cmdsize = sizeof(version_min_command); + vm->version = _file.minOSverson; + vm->sdk = _file.sdkVersion; + break; + case MachOLinkingContext::OS::iOS: + case MachOLinkingContext::OS::iOS_simulator: + vm->cmd = LC_VERSION_MIN_IPHONEOS; + vm->cmdsize = sizeof(version_min_command); + vm->version = _file.minOSverson; + vm->sdk = _file.sdkVersion; + break; + } + if (_swap) + swapStruct(*vm); + lc += sizeof(version_min_command); +} + +llvm::Error MachOFileLayout::writeLoadCommands() { + uint8_t *lc = &_buffer[_startOfLoadCommands]; + if (_file.fileType == llvm::MachO::MH_OBJECT) { + // Object files have one unnamed segment which holds all sections. + if (_is64) { + if (auto ec = writeSingleSegmentLoadCommand<MachO64Trait>(lc)) + return ec; + } else { + if (auto ec = writeSingleSegmentLoadCommand<MachO32Trait>(lc)) + return ec; + } + // Add LC_SYMTAB with symbol table info + symtab_command* st = reinterpret_cast<symtab_command*>(lc); + st->cmd = LC_SYMTAB; + st->cmdsize = sizeof(symtab_command); + st->symoff = _startOfSymbols; + st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + + _file.globalSymbols.size() + _file.undefinedSymbols.size(); + st->stroff = _startOfSymbolStrings; + st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; + if (_swap) + swapStruct(*st); + lc += sizeof(symtab_command); + + // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, + // LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS + writeVersionMinLoadCommand(_file, _swap, lc); + + // Add LC_FUNCTION_STARTS if needed. + if (_functionStartsSize != 0) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_FUNCTION_STARTS; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfFunctionStarts; + dl->datasize = _functionStartsSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + + // Add LC_DATA_IN_CODE if requested. + if (_file.generateDataInCodeLoadCommand) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_DATA_IN_CODE; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfDataInCode; + dl->datasize = _dataInCodeSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + } else { + // Final linked images have sections under segments. + if (_is64) { + if (auto ec = writeSegmentLoadCommands<MachO64Trait>(lc)) + return ec; + } else { + if (auto ec = writeSegmentLoadCommands<MachO32Trait>(lc)) + return ec; + } + + // Add LC_ID_DYLIB command for dynamic libraries. + if (_file.fileType == llvm::MachO::MH_DYLIB) { + dylib_command *dc = reinterpret_cast<dylib_command*>(lc); + StringRef path = _file.installName; + uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1); + dc->cmd = LC_ID_DYLIB; + dc->cmdsize = size; + dc->dylib.name = sizeof(dylib_command); // offset + // needs to be some constant value different than the one in LC_LOAD_DYLIB + dc->dylib.timestamp = 1; + dc->dylib.current_version = _file.currentVersion; + dc->dylib.compatibility_version = _file.compatVersion; + if (_swap) + swapStruct(*dc); + memcpy(lc + sizeof(dylib_command), path.begin(), path.size()); + lc[sizeof(dylib_command) + path.size()] = '\0'; + lc += size; + } + + // Add LC_DYLD_INFO_ONLY. + dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc); + di->cmd = LC_DYLD_INFO_ONLY; + di->cmdsize = sizeof(dyld_info_command); + di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0; + di->rebase_size = _rebaseInfo.size(); + di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0; + di->bind_size = _bindingInfo.size(); + di->weak_bind_off = 0; + di->weak_bind_size = 0; + di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0; + di->lazy_bind_size = _lazyBindingInfo.size(); + di->export_off = _exportTrie.size() ? _startOfExportTrie : 0; + di->export_size = _exportTrie.size(); + if (_swap) + swapStruct(*di); + lc += sizeof(dyld_info_command); + + // Add LC_SYMTAB with symbol table info. + symtab_command* st = reinterpret_cast<symtab_command*>(lc); + st->cmd = LC_SYMTAB; + st->cmdsize = sizeof(symtab_command); + st->symoff = _startOfSymbols; + st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + + _file.globalSymbols.size() + _file.undefinedSymbols.size(); + st->stroff = _startOfSymbolStrings; + st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; + if (_swap) + swapStruct(*st); + lc += sizeof(symtab_command); + + // Add LC_DYSYMTAB + if (_file.fileType != llvm::MachO::MH_PRELOAD) { + dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc); + dst->cmd = LC_DYSYMTAB; + dst->cmdsize = sizeof(dysymtab_command); + dst->ilocalsym = _symbolTableLocalsStartIndex; + dst->nlocalsym = _file.stabsSymbols.size() + + _file.localSymbols.size(); + dst->iextdefsym = _symbolTableGlobalsStartIndex; + dst->nextdefsym = _file.globalSymbols.size(); + dst->iundefsym = _symbolTableUndefinesStartIndex; + dst->nundefsym = _file.undefinedSymbols.size(); + dst->tocoff = 0; + dst->ntoc = 0; + dst->modtaboff = 0; + dst->nmodtab = 0; + dst->extrefsymoff = 0; + dst->nextrefsyms = 0; + dst->indirectsymoff = _startOfIndirectSymbols; + dst->nindirectsyms = _indirectSymbolTableCount; + dst->extreloff = 0; + dst->nextrel = 0; + dst->locreloff = 0; + dst->nlocrel = 0; + if (_swap) + swapStruct(*dst); + lc += sizeof(dysymtab_command); + } + + // If main executable, add LC_LOAD_DYLINKER + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + // Build LC_LOAD_DYLINKER load command. + uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1); + dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc); + dl->cmd = LC_LOAD_DYLINKER; + dl->cmdsize = size; + dl->name = sizeof(dylinker_command); // offset + if (_swap) + swapStruct(*dl); + memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size()); + lc[sizeof(dylinker_command)+dyldPath().size()] = '\0'; + lc += size; + } + + // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, + // LC_VERSION_MIN_TVOS + writeVersionMinLoadCommand(_file, _swap, lc); + + // Add LC_SOURCE_VERSION + { + // Note, using a temporary here to appease UB as we may not be aligned + // enough for a struct containing a uint64_t when emitting a 32-bit binary + source_version_command sv; + sv.cmd = LC_SOURCE_VERSION; + sv.cmdsize = sizeof(source_version_command); + sv.version = _file.sourceVersion; + if (_swap) + swapStruct(sv); + memcpy(lc, &sv, sizeof(source_version_command)); + lc += sizeof(source_version_command); + } + + // If main executable, add LC_MAIN. + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + // Build LC_MAIN load command. + // Note, using a temporary here to appease UB as we may not be aligned + // enough for a struct containing a uint64_t when emitting a 32-bit binary + entry_point_command ep; + ep.cmd = LC_MAIN; + ep.cmdsize = sizeof(entry_point_command); + ep.entryoff = _file.entryAddress - _seg1addr; + ep.stacksize = _file.stackSize; + if (_swap) + swapStruct(ep); + memcpy(lc, &ep, sizeof(entry_point_command)); + lc += sizeof(entry_point_command); + } + + // Add LC_LOAD_DYLIB commands + for (const DependentDylib &dep : _file.dependentDylibs) { + dylib_command* dc = reinterpret_cast<dylib_command*>(lc); + uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1); + dc->cmd = dep.kind; + dc->cmdsize = size; + dc->dylib.name = sizeof(dylib_command); // offset + // needs to be some constant value different than the one in LC_ID_DYLIB + dc->dylib.timestamp = 2; + dc->dylib.current_version = dep.currentVersion; + dc->dylib.compatibility_version = dep.compatVersion; + if (_swap) + swapStruct(*dc); + memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size()); + lc[sizeof(dylib_command)+dep.path.size()] = '\0'; + lc += size; + } + + // Add LC_RPATH + for (const StringRef &path : _file.rpaths) { + rpath_command *rpc = reinterpret_cast<rpath_command *>(lc); + uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1); + rpc->cmd = LC_RPATH; + rpc->cmdsize = size; + rpc->path = sizeof(rpath_command); // offset + if (_swap) + swapStruct(*rpc); + memcpy(lc+sizeof(rpath_command), path.begin(), path.size()); + lc[sizeof(rpath_command)+path.size()] = '\0'; + lc += size; + } + + // Add LC_FUNCTION_STARTS if needed. + if (_functionStartsSize != 0) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_FUNCTION_STARTS; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfFunctionStarts; + dl->datasize = _functionStartsSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + + // Add LC_DATA_IN_CODE if requested. + if (_file.generateDataInCodeLoadCommand) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_DATA_IN_CODE; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfDataInCode; + dl->datasize = _dataInCodeSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + } + return llvm::Error::success(); +} + +void MachOFileLayout::writeSectionContent() { + for (const Section &s : _file.sections) { + // Copy all section content to output buffer. + if (isZeroFillSection(s.type)) + continue; + if (s.content.empty()) + continue; + uint32_t offset = _sectInfo[&s].fileOffset; + uint8_t *p = &_buffer[offset]; + memcpy(p, &s.content[0], s.content.size()); + p += s.content.size(); + } +} + +void MachOFileLayout::writeRelocations() { + uint32_t relOffset = _startOfRelocations; + for (Section sect : _file.sections) { + for (Relocation r : sect.relocations) { + any_relocation_info* rb = reinterpret_cast<any_relocation_info*>( + &_buffer[relOffset]); + *rb = packRelocation(r, _swap, _bigEndianArch); + relOffset += sizeof(any_relocation_info); + } + } +} + +void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols, + uint32_t &symOffset, uint32_t &strOffset) { + for (const Symbol &sym : symbols) { + if (_is64) { + nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]); + nb->n_strx = strOffset - _startOfSymbolStrings; + nb->n_type = sym.type | sym.scope; + nb->n_sect = sym.sect; + nb->n_desc = sym.desc; + nb->n_value = sym.value; + if (_swap) + swapStruct(*nb); + symOffset += sizeof(nlist_64); + } else { + nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]); + nb->n_strx = strOffset - _startOfSymbolStrings; + nb->n_type = sym.type | sym.scope; + nb->n_sect = sym.sect; + nb->n_desc = sym.desc; + nb->n_value = sym.value; + if (_swap) + swapStruct(*nb); + symOffset += sizeof(nlist); + } + memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size()); + strOffset += sym.name.size(); + _buffer[strOffset++] ='\0'; // Strings in table have nul terminator. + } +} + +void MachOFileLayout::writeFunctionStartsInfo() { + if (!_functionStartsSize) + return; + memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(), + _functionStartsSize); +} + +void MachOFileLayout::writeDataInCodeInfo() { + uint32_t offset = _startOfDataInCode; + for (const DataInCode &entry : _file.dataInCode) { + data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>( + &_buffer[offset]); + dst->offset = entry.offset; + dst->length = entry.length; + dst->kind = entry.kind; + if (_swap) + swapStruct(*dst); + offset += sizeof(data_in_code_entry); + } +} + +void MachOFileLayout::writeSymbolTable() { + // Write symbol table and symbol strings in parallel. + uint32_t symOffset = _startOfSymbols; + uint32_t strOffset = _startOfSymbolStrings; + // Reserve n_strx offset of zero to mean no name. + _buffer[strOffset++] = ' '; + _buffer[strOffset++] = '\0'; + appendSymbols(_file.stabsSymbols, symOffset, strOffset); + appendSymbols(_file.localSymbols, symOffset, strOffset); + appendSymbols(_file.globalSymbols, symOffset, strOffset); + appendSymbols(_file.undefinedSymbols, symOffset, strOffset); + // Write indirect symbol table array. + uint32_t *indirects = reinterpret_cast<uint32_t*> + (&_buffer[_startOfIndirectSymbols]); + if (_file.fileType == llvm::MachO::MH_OBJECT) { + // Object files have sections in same order as input normalized file. + for (const Section §ion : _file.sections) { + for (uint32_t index : section.indirectSymbols) { + if (_swap) + *indirects++ = llvm::sys::getSwappedBytes(index); + else + *indirects++ = index; + } + } + } else { + // Final linked images must sort sections from normalized file. + for (const Segment &seg : _file.segments) { + SegExtraInfo &segInfo = _segInfo[&seg]; + for (const Section *section : segInfo.sections) { + for (uint32_t index : section->indirectSymbols) { + if (_swap) + *indirects++ = llvm::sys::getSwappedBytes(index); + else + *indirects++ = index; + } + } + } + } +} + +void MachOFileLayout::writeRebaseInfo() { + memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size()); +} + +void MachOFileLayout::writeBindingInfo() { + memcpy(&_buffer[_startOfBindingInfo], + _bindingInfo.bytes(), _bindingInfo.size()); +} + +void MachOFileLayout::writeLazyBindingInfo() { + memcpy(&_buffer[_startOfLazyBindingInfo], + _lazyBindingInfo.bytes(), _lazyBindingInfo.size()); +} + +void MachOFileLayout::writeExportInfo() { + memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size()); +} + +void MachOFileLayout::buildLinkEditInfo() { + buildRebaseInfo(); + buildBindInfo(); + buildLazyBindInfo(); + buildExportTrie(); + computeSymbolTableSizes(); + computeFunctionStartsSize(); + computeDataInCodeSize(); +} + +void MachOFileLayout::buildSectionRelocations() { + +} + +void MachOFileLayout::buildRebaseInfo() { + // TODO: compress rebasing info. + for (const RebaseLocation& entry : _file.rebasingInfo) { + _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind); + _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _rebaseInfo.append_uleb128(entry.segOffset); + _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1); + } + _rebaseInfo.append_byte(REBASE_OPCODE_DONE); + _rebaseInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::buildBindInfo() { + // TODO: compress bind info. + uint64_t lastAddend = 0; + int lastOrdinal = 0x80000000; + StringRef lastSymbolName; + BindType lastType = (BindType)0; + Hex32 lastSegOffset = ~0U; + uint8_t lastSegIndex = (uint8_t)~0U; + for (const BindLocation& entry : _file.bindingInfo) { + if (entry.ordinal != lastOrdinal) { + if (entry.ordinal <= 0) + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | + (entry.ordinal & BIND_IMMEDIATE_MASK)); + else if (entry.ordinal <= BIND_IMMEDIATE_MASK) + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + entry.ordinal); + else { + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + _bindingInfo.append_uleb128(entry.ordinal); + } + lastOrdinal = entry.ordinal; + } + + if (lastSymbolName != entry.symbolName) { + _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _bindingInfo.append_string(entry.symbolName); + lastSymbolName = entry.symbolName; + } + + if (lastType != entry.kind) { + _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); + lastType = entry.kind; + } + + if (lastSegIndex != entry.segIndex || lastSegOffset != entry.segOffset) { + _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _bindingInfo.append_uleb128(entry.segOffset); + lastSegIndex = entry.segIndex; + lastSegOffset = entry.segOffset; + } + if (entry.addend != lastAddend) { + _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB); + _bindingInfo.append_sleb128(entry.addend); + lastAddend = entry.addend; + } + _bindingInfo.append_byte(BIND_OPCODE_DO_BIND); + } + _bindingInfo.append_byte(BIND_OPCODE_DONE); + _bindingInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::buildLazyBindInfo() { + for (const BindLocation& entry : _file.lazyBindingInfo) { + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _lazyBindingInfo.append_uleb128(entry.segOffset); + if (entry.ordinal <= 0) + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | + (entry.ordinal & BIND_IMMEDIATE_MASK)); + else if (entry.ordinal <= BIND_IMMEDIATE_MASK) + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + entry.ordinal); + else { + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + _lazyBindingInfo.append_uleb128(entry.ordinal); + } + // FIXME: We need to | the opcode here with flags. + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _lazyBindingInfo.append_string(entry.symbolName); + _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND); + _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); + } + _lazyBindingInfo.align(_is64 ? 8 : 4); +} + +void TrieNode::addSymbol(const Export& entry, + BumpPtrAllocator &allocator, + std::vector<TrieNode*> &allNodes) { + StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); + for (TrieEdge &edge : _children) { + StringRef edgeStr = edge._subString; + if (partialStr.startswith(edgeStr)) { + // Already have matching edge, go down that path. + edge._child->addSymbol(entry, allocator, allNodes); + return; + } + // See if string has commmon prefix with existing edge. + for (int n=edgeStr.size()-1; n > 0; --n) { + if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) { + // Splice in new node: was A -> C, now A -> B -> C + StringRef bNodeStr = edge._child->_cummulativeString; + bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator); + auto *bNode = new (allocator) TrieNode(bNodeStr); + allNodes.push_back(bNode); + TrieNode* cNode = edge._child; + StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator); + StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator); + DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() + << "splice in TrieNode('" << bNodeStr + << "') between edge '" + << abEdgeStr << "' and edge='" + << bcEdgeStr<< "'\n"); + TrieEdge& abEdge = edge; + abEdge._subString = abEdgeStr; + abEdge._child = bNode; + auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode); + bNode->_children.insert(bNode->_children.end(), bcEdge); + bNode->addSymbol(entry, allocator, allNodes); + return; + } + } + } + if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + assert(entry.otherOffset != 0); + } + if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { + assert(entry.otherOffset != 0); + } + // No commonality with any existing child, make a new edge. + auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator)); + auto *newEdge = new (allocator) TrieEdge(partialStr, newNode); + _children.insert(_children.end(), newEdge); + DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() + << "new TrieNode('" << entry.name << "') with edge '" + << partialStr << "' from node='" + << _cummulativeString << "'\n"); + newNode->_address = entry.offset; + newNode->_flags = entry.flags | entry.kind; + newNode->_other = entry.otherOffset; + if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty()) + newNode->_importedName = entry.otherName.copy(allocator); + newNode->_hasExportInfo = true; + allNodes.push_back(newNode); +} + +void TrieNode::addOrderedNodes(const Export& entry, + std::vector<TrieNode*> &orderedNodes) { + if (!_ordered) { + orderedNodes.push_back(this); + _ordered = true; + } + + StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); + for (TrieEdge &edge : _children) { + StringRef edgeStr = edge._subString; + if (partialStr.startswith(edgeStr)) { + // Already have matching edge, go down that path. + edge._child->addOrderedNodes(entry, orderedNodes); + return; + } + } +} + +bool TrieNode::updateOffset(uint32_t& offset) { + uint32_t nodeSize = 1; // Length when no export info + if (_hasExportInfo) { + if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + nodeSize = llvm::getULEB128Size(_flags); + nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal. + nodeSize += _importedName.size(); + ++nodeSize; // Trailing zero in imported name. + } else { + nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address); + if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) + nodeSize += llvm::getULEB128Size(_other); + } + // Overall node size so far is uleb128 of export info + actual export info. + nodeSize += llvm::getULEB128Size(nodeSize); + } + // Compute size of all child edges. + ++nodeSize; // Byte for number of chidren. + for (TrieEdge &edge : _children) { + nodeSize += edge._subString.size() + 1 // String length. + + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len. + } + // On input, 'offset' is new prefered location for this node. + bool result = (_trieOffset != offset); + // Store new location in node object for use by parents. + _trieOffset = offset; + // Update offset for next iteration. + offset += nodeSize; + // Return true if _trieOffset was changed. + return result; +} + +void TrieNode::appendToByteBuffer(ByteBuffer &out) { + if (_hasExportInfo) { + if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + if (!_importedName.empty()) { + // nodes with re-export info: size, flags, ordinal, import-name + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_other) + + _importedName.size() + 1; + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_other); + out.append_string(_importedName); + } else { + // nodes without re-export info: size, flags, ordinal, empty-string + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_other) + 1; + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_other); + out.append_byte(0); + } + } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { + // Nodes with export info: size, flags, address, other + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_address) + + llvm::getULEB128Size(_other); + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_address); + out.append_uleb128(_other); + } else { + // Nodes with export info: size, flags, address + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_address); + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_address); + } + } else { + // Node with no export info. + uint32_t nodeSize = 0; + out.append_byte(nodeSize); + } + // Add number of children. + assert(_children.size() < 256); + out.append_byte(_children.size()); + // Append each child edge substring and node offset. + for (TrieEdge &edge : _children) { + out.append_string(edge._subString); + out.append_uleb128(edge._child->_trieOffset); + } +} + +void MachOFileLayout::buildExportTrie() { + if (_file.exportInfo.empty()) + return; + + // For all temporary strings and objects used building trie. + BumpPtrAllocator allocator; + + // Build trie of all exported symbols. + auto *rootNode = new (allocator) TrieNode(StringRef()); + std::vector<TrieNode*> allNodes; + allNodes.reserve(_file.exportInfo.size()*2); + allNodes.push_back(rootNode); + for (const Export& entry : _file.exportInfo) { + rootNode->addSymbol(entry, allocator, allNodes); + } + + std::vector<TrieNode*> orderedNodes; + orderedNodes.reserve(allNodes.size()); + + for (const Export& entry : _file.exportInfo) + rootNode->addOrderedNodes(entry, orderedNodes); + + // Assign each node in the vector an offset in the trie stream, iterating + // until all uleb128 sizes have stabilized. + bool more; + do { + uint32_t offset = 0; + more = false; + for (TrieNode* node : orderedNodes) { + if (node->updateOffset(offset)) + more = true; + } + } while (more); + + // Serialize trie to ByteBuffer. + for (TrieNode* node : orderedNodes) { + node->appendToByteBuffer(_exportTrie); + } + _exportTrie.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::computeSymbolTableSizes() { + // MachO symbol tables have three ranges: locals, globals, and undefines + const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist)); + _symbolTableSize = nlistSize * (_file.stabsSymbols.size() + + _file.localSymbols.size() + + _file.globalSymbols.size() + + _file.undefinedSymbols.size()); + // Always reserve 1-byte for the empty string and 1-byte for its terminator. + _symbolStringPoolSize = 2; + for (const Symbol &sym : _file.stabsSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.localSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.globalSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.undefinedSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + _symbolTableLocalsStartIndex = 0; + _symbolTableGlobalsStartIndex = _file.stabsSymbols.size() + + _file.localSymbols.size(); + _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex + + _file.globalSymbols.size(); + + _indirectSymbolTableCount = 0; + for (const Section § : _file.sections) { + _indirectSymbolTableCount += sect.indirectSymbols.size(); + } +} + +void MachOFileLayout::computeFunctionStartsSize() { + _functionStartsSize = _file.functionStarts.size(); +} + +void MachOFileLayout::computeDataInCodeSize() { + _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry); +} + +void MachOFileLayout::writeLinkEditContent() { + if (_file.fileType == llvm::MachO::MH_OBJECT) { + writeRelocations(); + writeFunctionStartsInfo(); + writeDataInCodeInfo(); + writeSymbolTable(); + } else { + writeRebaseInfo(); + writeBindingInfo(); + writeLazyBindingInfo(); + // TODO: add weak binding info + writeExportInfo(); + writeFunctionStartsInfo(); + writeDataInCodeInfo(); + writeSymbolTable(); + } +} + +llvm::Error MachOFileLayout::writeBinary(StringRef path) { + // Check for pending error from constructor. + if (_ec) + return llvm::errorCodeToError(_ec); + // Create FileOutputBuffer with calculated size. + unsigned flags = 0; + if (_file.fileType != llvm::MachO::MH_OBJECT) + flags = llvm::FileOutputBuffer::F_executable; + ErrorOr<std::unique_ptr<llvm::FileOutputBuffer>> fobOrErr = + llvm::FileOutputBuffer::create(path, size(), flags); + if (std::error_code ec = fobOrErr.getError()) + return llvm::errorCodeToError(ec); + std::unique_ptr<llvm::FileOutputBuffer> &fob = *fobOrErr; + // Write content. + _buffer = fob->getBufferStart(); + writeMachHeader(); + if (auto ec = writeLoadCommands()) + return ec; + writeSectionContent(); + writeLinkEditContent(); + fob->commit(); + + return llvm::Error::success(); +} + +/// Takes in-memory normalized view and writes a mach-o object file. +llvm::Error writeBinary(const NormalizedFile &file, StringRef path) { + MachOFileLayout layout(file); + return layout.writeBinary(path); +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp new file mode 100644 index 000000000000..ddd3259842e2 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp @@ -0,0 +1,1599 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file Converts from in-memory Atoms to in-memory normalized mach-o. +/// +/// +------------+ +/// | normalized | +/// +------------+ +/// ^ +/// | +/// | +/// +-------+ +/// | Atoms | +/// +-------+ + +#include "MachONormalizedFile.h" +#include "ArchHandler.h" +#include "DebugInfo.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include <map> +#include <system_error> +#include <unordered_set> + +using llvm::StringRef; +using llvm::isa; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; +using namespace lld; + +namespace { + +struct AtomInfo { + const DefinedAtom *atom; + uint64_t offsetInSection; +}; + +struct SectionInfo { + SectionInfo(StringRef seg, StringRef sect, SectionType type, + const MachOLinkingContext &ctxt, uint32_t attr, + bool relocsToDefinedCanBeImplicit); + + StringRef segmentName; + StringRef sectionName; + SectionType type; + uint32_t attributes; + uint64_t address; + uint64_t size; + uint16_t alignment; + + /// If this is set, the any relocs in this section which point to defined + /// addresses can be implicitly generated. This is the case for the + /// __eh_frame section where references to the function can be implicit if the + /// function is defined. + bool relocsToDefinedCanBeImplicit; + + + std::vector<AtomInfo> atomsAndOffsets; + uint32_t normalizedSectionIndex; + uint32_t finalSectionIndex; +}; + +SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t, + const MachOLinkingContext &ctxt, uint32_t attrs, + bool relocsToDefinedCanBeImplicit) + : segmentName(sg), sectionName(sct), type(t), attributes(attrs), + address(0), size(0), alignment(1), + relocsToDefinedCanBeImplicit(relocsToDefinedCanBeImplicit), + normalizedSectionIndex(0), finalSectionIndex(0) { + uint16_t align = 1; + if (ctxt.sectionAligned(segmentName, sectionName, align)) { + alignment = align; + } +} + +struct SegmentInfo { + SegmentInfo(StringRef name); + + StringRef name; + uint64_t address; + uint64_t size; + uint32_t init_access; + uint32_t max_access; + std::vector<SectionInfo*> sections; + uint32_t normalizedSegmentIndex; +}; + +SegmentInfo::SegmentInfo(StringRef n) + : name(n), address(0), size(0), init_access(0), max_access(0), + normalizedSegmentIndex(0) { +} + +class Util { +public: + Util(const MachOLinkingContext &ctxt) + : _ctx(ctxt), _archHandler(ctxt.archHandler()), _entryAtom(nullptr), + _hasTLVDescriptors(false), _subsectionsViaSymbols(true) {} + ~Util(); + + void processDefinedAtoms(const lld::File &atomFile); + void processAtomAttributes(const DefinedAtom *atom); + void assignAtomToSection(const DefinedAtom *atom); + void organizeSections(); + void assignAddressesToSections(const NormalizedFile &file); + uint32_t fileFlags(); + void copySegmentInfo(NormalizedFile &file); + void copySectionInfo(NormalizedFile &file); + void updateSectionInfo(NormalizedFile &file); + void buildAtomToAddressMap(); + llvm::Error synthesizeDebugNotes(NormalizedFile &file); + llvm::Error addSymbols(const lld::File &atomFile, NormalizedFile &file); + void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file); + void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file); + void addExportInfo(const lld::File &, NormalizedFile &file); + void addSectionRelocs(const lld::File &, NormalizedFile &file); + void addFunctionStarts(const lld::File &, NormalizedFile &file); + void buildDataInCodeArray(const lld::File &, NormalizedFile &file); + void addDependentDylibs(const lld::File &, NormalizedFile &file); + void copyEntryPointAddress(NormalizedFile &file); + void copySectionContent(NormalizedFile &file); + + bool allSourceFilesHaveMinVersions() const { + return _allSourceFilesHaveMinVersions; + } + + uint32_t minVersion() const { + return _minVersion; + } + + LoadCommandType minVersionCommandType() const { + return _minVersionCommandType; + } + +private: + typedef std::map<DefinedAtom::ContentType, SectionInfo*> TypeToSection; + typedef llvm::DenseMap<const Atom*, uint64_t> AtomToAddress; + + struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; }; + typedef llvm::StringMap<DylibInfo> DylibPathToInfo; + + SectionInfo *sectionForAtom(const DefinedAtom*); + SectionInfo *getRelocatableSection(DefinedAtom::ContentType type); + SectionInfo *getFinalSection(DefinedAtom::ContentType type); + void appendAtom(SectionInfo *sect, const DefinedAtom *atom); + SegmentInfo *segmentForName(StringRef segName); + void layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr); + void layoutSectionsInTextSegment(size_t, SegmentInfo *, uint64_t &); + void copySectionContent(SectionInfo *si, ContentBytes &content); + uint16_t descBits(const DefinedAtom* atom); + int dylibOrdinal(const SharedLibraryAtom *sa); + void segIndexForSection(const SectionInfo *sect, + uint8_t &segmentIndex, uint64_t &segmentStartAddr); + const Atom *targetOfLazyPointer(const DefinedAtom *lpAtom); + const Atom *targetOfStub(const DefinedAtom *stubAtom); + llvm::Error getSymbolTableRegion(const DefinedAtom* atom, + bool &inGlobalsRegion, + SymbolScope &symbolScope); + void appendSection(SectionInfo *si, NormalizedFile &file); + uint32_t sectionIndexForAtom(const Atom *atom); + + typedef llvm::DenseMap<const Atom*, uint32_t> AtomToIndex; + struct AtomAndIndex { const Atom *atom; uint32_t index; SymbolScope scope; }; + struct AtomSorter { + bool operator()(const AtomAndIndex &left, const AtomAndIndex &right); + }; + struct SegmentSorter { + bool operator()(const SegmentInfo *left, const SegmentInfo *right); + static unsigned weight(const SegmentInfo *); + }; + struct TextSectionSorter { + bool operator()(const SectionInfo *left, const SectionInfo *right); + static unsigned weight(const SectionInfo *); + }; + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + llvm::BumpPtrAllocator _allocator; + std::vector<SectionInfo*> _sectionInfos; + std::vector<SegmentInfo*> _segmentInfos; + TypeToSection _sectionMap; + std::vector<SectionInfo*> _customSections; + AtomToAddress _atomToAddress; + DylibPathToInfo _dylibInfo; + const DefinedAtom *_entryAtom; + AtomToIndex _atomToSymbolIndex; + std::vector<const Atom *> _machHeaderAliasAtoms; + bool _hasTLVDescriptors; + bool _subsectionsViaSymbols; + bool _allSourceFilesHaveMinVersions = true; + LoadCommandType _minVersionCommandType = (LoadCommandType)0; + uint32_t _minVersion = 0; + std::vector<lld::mach_o::Stab> _stabs; +}; + +Util::~Util() { + // The SectionInfo structs are BumpPtr allocated, but atomsAndOffsets needs + // to be deleted. + for (SectionInfo *si : _sectionInfos) { + // clear() destroys vector elements, but does not deallocate. + // Instead use swap() to deallocate vector buffer. + std::vector<AtomInfo> empty; + si->atomsAndOffsets.swap(empty); + } + // The SegmentInfo structs are BumpPtr allocated, but sections needs + // to be deleted. + for (SegmentInfo *sgi : _segmentInfos) { + std::vector<SectionInfo*> empty2; + sgi->sections.swap(empty2); + } +} + +SectionInfo *Util::getRelocatableSection(DefinedAtom::ContentType type) { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + SectionAttr sectionAttrs; + bool relocsToDefinedCanBeImplicit; + + // Use same table used by when parsing .o files. + relocatableSectionInfoForContentType(type, segmentName, sectionName, + sectionType, sectionAttrs, + relocsToDefinedCanBeImplicit); + // If we already have a SectionInfo with this name, re-use it. + // This can happen if two ContentType map to the same mach-o section. + for (auto sect : _sectionMap) { + if (sect.second->sectionName.equals(sectionName) && + sect.second->segmentName.equals(segmentName)) { + return sect.second; + } + } + // Otherwise allocate new SectionInfo object. + auto *sect = new (_allocator) + SectionInfo(segmentName, sectionName, sectionType, _ctx, sectionAttrs, + relocsToDefinedCanBeImplicit); + _sectionInfos.push_back(sect); + _sectionMap[type] = sect; + return sect; +} + +#define ENTRY(seg, sect, type, atomType) \ + {seg, sect, type, DefinedAtom::atomType } + +struct MachOFinalSectionFromAtomType { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + DefinedAtom::ContentType atomType; +}; + +const MachOFinalSectionFromAtomType sectsToAtomType[] = { + ENTRY("__TEXT", "__text", S_REGULAR, typeCode), + ENTRY("__TEXT", "__text", S_REGULAR, typeMachHeader), + ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), + ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), + ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), + ENTRY("__TEXT", "__const", S_4BYTE_LITERALS, typeLiteral4), + ENTRY("__TEXT", "__const", S_8BYTE_LITERALS, typeLiteral8), + ENTRY("__TEXT", "__const", S_16BYTE_LITERALS, typeLiteral16), + ENTRY("__TEXT", "__stubs", S_SYMBOL_STUBS, typeStub), + ENTRY("__TEXT", "__stub_helper", S_REGULAR, typeStubHelper), + ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), + ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), + ENTRY("__TEXT", "__unwind_info", S_REGULAR, typeProcessedUnwindInfo), + ENTRY("__DATA", "__data", S_REGULAR, typeData), + ENTRY("__DATA", "__const", S_REGULAR, typeConstData), + ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), + ENTRY("__DATA", "__la_symbol_ptr", S_LAZY_SYMBOL_POINTERS, + typeLazyPointer), + ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, + typeInitializerPtr), + ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, + typeTerminatorPtr), + ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__nl_symbol_ptr", S_NON_LAZY_SYMBOL_POINTERS, + typeNonLazyPointer), + ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, + typeThunkTLV), + ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, + typeTLVInitialData), + ENTRY("__DATA", "__thread_ptrs", S_THREAD_LOCAL_VARIABLE_POINTERS, + typeTLVInitializerPtr), + ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, + typeTLVInitialZeroFill), + ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), + ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), +}; +#undef ENTRY + +SectionInfo *Util::getFinalSection(DefinedAtom::ContentType atomType) { + for (auto &p : sectsToAtomType) { + if (p.atomType != atomType) + continue; + SectionAttr sectionAttrs = 0; + switch (atomType) { + case DefinedAtom::typeMachHeader: + case DefinedAtom::typeCode: + case DefinedAtom::typeStub: + case DefinedAtom::typeStubHelper: + sectionAttrs = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS; + break; + case DefinedAtom::typeThunkTLV: + _hasTLVDescriptors = true; + break; + default: + break; + } + // If we already have a SectionInfo with this name, re-use it. + // This can happen if two ContentType map to the same mach-o section. + for (auto sect : _sectionMap) { + if (sect.second->sectionName.equals(p.sectionName) && + sect.second->segmentName.equals(p.segmentName)) { + return sect.second; + } + } + // Otherwise allocate new SectionInfo object. + auto *sect = new (_allocator) SectionInfo( + p.segmentName, p.sectionName, p.sectionType, _ctx, sectionAttrs, + /* relocsToDefinedCanBeImplicit */ false); + _sectionInfos.push_back(sect); + _sectionMap[atomType] = sect; + return sect; + } + llvm_unreachable("content type not yet supported"); +} + +SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) { + if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) { + // Section for this atom is derived from content type. + DefinedAtom::ContentType type = atom->contentType(); + auto pos = _sectionMap.find(type); + if ( pos != _sectionMap.end() ) + return pos->second; + bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + return rMode ? getRelocatableSection(type) : getFinalSection(type); + } else { + // This atom needs to be in a custom section. + StringRef customName = atom->customSectionName(); + // Look to see if we have already allocated the needed custom section. + for(SectionInfo *sect : _customSections) { + const DefinedAtom *firstAtom = sect->atomsAndOffsets.front().atom; + if (firstAtom->customSectionName().equals(customName)) { + return sect; + } + } + // Not found, so need to create a new custom section. + size_t seperatorIndex = customName.find('/'); + assert(seperatorIndex != StringRef::npos); + StringRef segName = customName.slice(0, seperatorIndex); + StringRef sectName = customName.drop_front(seperatorIndex + 1); + auto *sect = + new (_allocator) SectionInfo(segName, sectName, S_REGULAR, _ctx, + 0, /* relocsToDefinedCanBeImplicit */ false); + _customSections.push_back(sect); + _sectionInfos.push_back(sect); + return sect; + } +} + +void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) { + // Figure out offset for atom in this section given alignment constraints. + uint64_t offset = sect->size; + DefinedAtom::Alignment atomAlign = atom->alignment(); + uint64_t align = atomAlign.value; + uint64_t requiredModulus = atomAlign.modulus; + uint64_t currentModulus = (offset % align); + if ( currentModulus != requiredModulus ) { + if ( requiredModulus > currentModulus ) + offset += requiredModulus-currentModulus; + else + offset += align+requiredModulus-currentModulus; + } + // Record max alignment of any atom in this section. + if (align > sect->alignment) + sect->alignment = atomAlign.value; + // Assign atom to this section with this offset. + AtomInfo ai = {atom, offset}; + sect->atomsAndOffsets.push_back(ai); + // Update section size to include this atom. + sect->size = offset + atom->size(); +} + +void Util::processDefinedAtoms(const lld::File &atomFile) { + for (const DefinedAtom *atom : atomFile.defined()) { + processAtomAttributes(atom); + assignAtomToSection(atom); + } +} + +void Util::processAtomAttributes(const DefinedAtom *atom) { + if (auto *machoFile = dyn_cast<mach_o::MachOFile>(&atom->file())) { + // If the file doesn't use subsections via symbols, then make sure we don't + // add that flag to the final output file if we have a relocatable file. + if (!machoFile->subsectionsViaSymbols()) + _subsectionsViaSymbols = false; + + // All the source files must have min versions for us to output an object + // file with a min version. + if (auto v = machoFile->minVersion()) + _minVersion = std::max(_minVersion, v); + else + _allSourceFilesHaveMinVersions = false; + + // If we don't have a platform load command, but one of the source files + // does, then take the one from the file. + if (!_minVersionCommandType) + if (auto v = machoFile->minVersionLoadCommandKind()) + _minVersionCommandType = v; + } +} + +void Util::assignAtomToSection(const DefinedAtom *atom) { + if (atom->contentType() == DefinedAtom::typeMachHeader) { + _machHeaderAliasAtoms.push_back(atom); + // Assign atom to this section with this offset. + AtomInfo ai = {atom, 0}; + sectionForAtom(atom)->atomsAndOffsets.push_back(ai); + } else if (atom->contentType() == DefinedAtom::typeDSOHandle) + _machHeaderAliasAtoms.push_back(atom); + else + appendAtom(sectionForAtom(atom), atom); +} + +SegmentInfo *Util::segmentForName(StringRef segName) { + for (SegmentInfo *si : _segmentInfos) { + if ( si->name.equals(segName) ) + return si; + } + auto *info = new (_allocator) SegmentInfo(segName); + + // Set the initial segment protection. + if (segName.equals("__TEXT")) + info->init_access = VM_PROT_READ | VM_PROT_EXECUTE; + else if (segName.equals("__PAGEZERO")) + info->init_access = 0; + else if (segName.equals("__LINKEDIT")) + info->init_access = VM_PROT_READ; + else { + // All others default to read-write + info->init_access = VM_PROT_READ | VM_PROT_WRITE; + } + + // Set max segment protection + // Note, its overkill to use a switch statement here, but makes it so much + // easier to use switch coverage to catch new cases. + switch (_ctx.os()) { + case lld::MachOLinkingContext::OS::unknown: + case lld::MachOLinkingContext::OS::macOSX: + case lld::MachOLinkingContext::OS::iOS_simulator: + if (segName.equals("__PAGEZERO")) { + info->max_access = 0; + break; + } + // All others default to all + info->max_access = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; + break; + case lld::MachOLinkingContext::OS::iOS: + // iPhoneOS always uses same protection for max and initial + info->max_access = info->init_access; + break; + } + _segmentInfos.push_back(info); + return info; +} + +unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) { + return llvm::StringSwitch<unsigned>(seg->name) + .Case("__PAGEZERO", 1) + .Case("__TEXT", 2) + .Case("__DATA", 3) + .Default(100); +} + +bool Util::SegmentSorter::operator()(const SegmentInfo *left, + const SegmentInfo *right) { + return (weight(left) < weight(right)); +} + +unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) { + return llvm::StringSwitch<unsigned>(sect->sectionName) + .Case("__text", 1) + .Case("__stubs", 2) + .Case("__stub_helper", 3) + .Case("__const", 4) + .Case("__cstring", 5) + .Case("__unwind_info", 98) + .Case("__eh_frame", 99) + .Default(10); +} + +bool Util::TextSectionSorter::operator()(const SectionInfo *left, + const SectionInfo *right) { + return (weight(left) < weight(right)); +} + +void Util::organizeSections() { + // NOTE!: Keep this in sync with assignAddressesToSections. + switch (_ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + // Main executables, need a zero-page segment + segmentForName("__PAGEZERO"); + // Fall into next case. + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + // All dynamic code needs TEXT segment to hold the load commands. + segmentForName("__TEXT"); + break; + default: + break; + } + segmentForName("__LINKEDIT"); + + // Group sections into segments. + for (SectionInfo *si : _sectionInfos) { + SegmentInfo *seg = segmentForName(si->segmentName); + seg->sections.push_back(si); + } + // Sort segments. + std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter()); + + // Sort sections within segments. + for (SegmentInfo *seg : _segmentInfos) { + if (seg->name.equals("__TEXT")) { + std::sort(seg->sections.begin(), seg->sections.end(), + TextSectionSorter()); + } + } + + // Record final section indexes. + uint32_t segmentIndex = 0; + uint32_t sectionIndex = 1; + for (SegmentInfo *seg : _segmentInfos) { + seg->normalizedSegmentIndex = segmentIndex++; + for (SectionInfo *sect : seg->sections) + sect->finalSectionIndex = sectionIndex++; + } +} + +void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) { + seg->address = addr; + for (SectionInfo *sect : seg->sections) { + sect->address = llvm::alignTo(addr, sect->alignment); + addr = sect->address + sect->size; + } + seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize()); +} + +// __TEXT segment lays out backwards so padding is at front after load commands. +void Util::layoutSectionsInTextSegment(size_t hlcSize, SegmentInfo *seg, + uint64_t &addr) { + seg->address = addr; + // Walks sections starting at end to calculate padding for start. + int64_t taddr = 0; + for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) { + SectionInfo *sect = *it; + taddr -= sect->size; + taddr = taddr & (0 - sect->alignment); + } + int64_t padding = taddr - hlcSize; + while (padding < 0) + padding += _ctx.pageSize(); + // Start assigning section address starting at padded offset. + addr += (padding + hlcSize); + for (SectionInfo *sect : seg->sections) { + sect->address = llvm::alignTo(addr, sect->alignment); + addr = sect->address + sect->size; + } + seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize()); +} + +void Util::assignAddressesToSections(const NormalizedFile &file) { + // NOTE!: Keep this in sync with organizeSections. + size_t hlcSize = headerAndLoadCommandsSize(file); + uint64_t address = 0; + for (SegmentInfo *seg : _segmentInfos) { + if (seg->name.equals("__PAGEZERO")) { + seg->size = _ctx.pageZeroSize(); + address += seg->size; + } + else if (seg->name.equals("__TEXT")) { + // _ctx.baseAddress() == 0 implies it was either unspecified or + // pageZeroSize is also 0. In either case resetting address is safe. + address = _ctx.baseAddress() ? _ctx.baseAddress() : address; + layoutSectionsInTextSegment(hlcSize, seg, address); + } else + layoutSectionsInSegment(seg, address); + + address = llvm::alignTo(address, _ctx.pageSize()); + } + DEBUG_WITH_TYPE("WriterMachO-norm", + llvm::dbgs() << "assignAddressesToSections()\n"; + for (SegmentInfo *sgi : _segmentInfos) { + llvm::dbgs() << " address=" << llvm::format("0x%08llX", sgi->address) + << ", size=" << llvm::format("0x%08llX", sgi->size) + << ", segment-name='" << sgi->name + << "'\n"; + for (SectionInfo *si : sgi->sections) { + llvm::dbgs()<< " addr=" << llvm::format("0x%08llX", si->address) + << ", size=" << llvm::format("0x%08llX", si->size) + << ", section-name='" << si->sectionName + << "\n"; + } + } + ); +} + +void Util::copySegmentInfo(NormalizedFile &file) { + for (SegmentInfo *sgi : _segmentInfos) { + Segment seg; + seg.name = sgi->name; + seg.address = sgi->address; + seg.size = sgi->size; + seg.init_access = sgi->init_access; + seg.max_access = sgi->max_access; + file.segments.push_back(seg); + } +} + +void Util::appendSection(SectionInfo *si, NormalizedFile &file) { + // Add new empty section to end of file.sections. + Section temp; + file.sections.push_back(std::move(temp)); + Section* normSect = &file.sections.back(); + // Copy fields to normalized section. + normSect->segmentName = si->segmentName; + normSect->sectionName = si->sectionName; + normSect->type = si->type; + normSect->attributes = si->attributes; + normSect->address = si->address; + normSect->alignment = si->alignment; + // Record where normalized section is. + si->normalizedSectionIndex = file.sections.size()-1; +} + +void Util::copySectionContent(NormalizedFile &file) { + const bool r = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + + // Utility function for ArchHandler to find address of atom in output file. + auto addrForAtom = [&] (const Atom &atom) -> uint64_t { + auto pos = _atomToAddress.find(&atom); + assert(pos != _atomToAddress.end()); + return pos->second; + }; + + auto sectionAddrForAtom = [&] (const Atom &atom) -> uint64_t { + for (const SectionInfo *sectInfo : _sectionInfos) + for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) + if (atomInfo.atom == &atom) + return sectInfo->address; + llvm_unreachable("atom not assigned to section"); + }; + + for (SectionInfo *si : _sectionInfos) { + Section *normSect = &file.sections[si->normalizedSectionIndex]; + if (isZeroFillSection(si->type)) { + const uint8_t *empty = nullptr; + normSect->content = llvm::makeArrayRef(empty, si->size); + continue; + } + // Copy content from atoms to content buffer for section. + llvm::MutableArrayRef<uint8_t> sectionContent; + if (si->size) { + uint8_t *sectContent = file.ownedAllocations.Allocate<uint8_t>(si->size); + sectionContent = llvm::MutableArrayRef<uint8_t>(sectContent, si->size); + normSect->content = sectionContent; + } + for (AtomInfo &ai : si->atomsAndOffsets) { + if (!ai.atom->size()) { + assert(ai.atom->begin() == ai.atom->end() && + "Cannot have references without content"); + continue; + } + auto atomContent = sectionContent.slice(ai.offsetInSection, + ai.atom->size()); + _archHandler.generateAtomContent(*ai.atom, r, addrForAtom, + sectionAddrForAtom, _ctx.baseAddress(), + atomContent); + } + } +} + +void Util::copySectionInfo(NormalizedFile &file) { + file.sections.reserve(_sectionInfos.size()); + // Write sections grouped by segment. + for (SegmentInfo *sgi : _segmentInfos) { + for (SectionInfo *si : sgi->sections) { + appendSection(si, file); + } + } +} + +void Util::updateSectionInfo(NormalizedFile &file) { + file.sections.reserve(_sectionInfos.size()); + // sections grouped by segment. + for (SegmentInfo *sgi : _segmentInfos) { + Segment *normSeg = &file.segments[sgi->normalizedSegmentIndex]; + normSeg->address = sgi->address; + normSeg->size = sgi->size; + for (SectionInfo *si : sgi->sections) { + Section *normSect = &file.sections[si->normalizedSectionIndex]; + normSect->address = si->address; + } + } +} + +void Util::copyEntryPointAddress(NormalizedFile &nFile) { + if (!_entryAtom) { + nFile.entryAddress = 0; + return; + } + + if (_ctx.outputTypeHasEntry()) { + if (_archHandler.isThumbFunction(*_entryAtom)) + nFile.entryAddress = (_atomToAddress[_entryAtom] | 1); + else + nFile.entryAddress = _atomToAddress[_entryAtom]; + } +} + +void Util::buildAtomToAddressMap() { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << "assign atom addresses:\n"); + const bool lookForEntry = _ctx.outputTypeHasEntry(); + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + _atomToAddress[info.atom] = sect->address + info.offsetInSection; + if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) && + (info.atom->size() != 0) && + info.atom->name() == _ctx.entrySymbolName()) { + _entryAtom = info.atom; + } + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[info.atom]) + << llvm::format(" 0x%09lX", info.atom) + << ", file=#" + << info.atom->file().ordinal() + << ", atom=#" + << info.atom->ordinal() + << ", name=" + << info.atom->name() + << ", type=" + << info.atom->contentType() + << "\n"); + } + } + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << "assign header alias atom addresses:\n"); + for (const Atom *atom : _machHeaderAliasAtoms) { + _atomToAddress[atom] = _ctx.baseAddress(); +#ifndef NDEBUG + if (auto *definedAtom = dyn_cast<DefinedAtom>(atom)) { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[atom]) + << llvm::format(" 0x%09lX", atom) + << ", file=#" + << definedAtom->file().ordinal() + << ", atom=#" + << definedAtom->ordinal() + << ", name=" + << definedAtom->name() + << ", type=" + << definedAtom->contentType() + << "\n"); + } else { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[atom]) + << " atom=" << atom + << " name=" << atom->name() << "\n"); + } +#endif + } +} + +llvm::Error Util::synthesizeDebugNotes(NormalizedFile &file) { + + // Bail out early if we don't need to generate a debug map. + if (_ctx.debugInfoMode() == MachOLinkingContext::DebugInfoMode::noDebugMap) + return llvm::Error::success(); + + std::vector<const DefinedAtom*> atomsNeedingDebugNotes; + std::set<const mach_o::MachOFile*> filesWithStabs; + bool objFileHasDwarf = false; + const File *objFile = nullptr; + + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + if (const DefinedAtom *atom = dyn_cast<DefinedAtom>(info.atom)) { + + // FIXME: No stabs/debug-notes for symbols that wouldn't be in the + // symbol table. + // FIXME: No stabs/debug-notes for kernel dtrace probes. + + if (atom->contentType() == DefinedAtom::typeCFI || + atom->contentType() == DefinedAtom::typeCString) + continue; + + // Whenever we encounter a new file, update the 'objfileHasDwarf' flag. + if (&info.atom->file() != objFile) { + objFileHasDwarf = false; + if (const mach_o::MachOFile *atomFile = + dyn_cast<mach_o::MachOFile>(&info.atom->file())) { + if (atomFile->debugInfo()) { + if (isa<mach_o::DwarfDebugInfo>(atomFile->debugInfo())) + objFileHasDwarf = true; + else if (isa<mach_o::StabsDebugInfo>(atomFile->debugInfo())) + filesWithStabs.insert(atomFile); + } + } + } + + // If this atom is from a file that needs dwarf, add it to the list. + if (objFileHasDwarf) + atomsNeedingDebugNotes.push_back(info.atom); + } + } + } + + // Sort atoms needing debug notes by file ordinal, then atom ordinal. + std::sort(atomsNeedingDebugNotes.begin(), atomsNeedingDebugNotes.end(), + [](const DefinedAtom *lhs, const DefinedAtom *rhs) { + if (lhs->file().ordinal() != rhs->file().ordinal()) + return (lhs->file().ordinal() < rhs->file().ordinal()); + return (lhs->ordinal() < rhs->ordinal()); + }); + + // FIXME: Handle <rdar://problem/17689030>: Add -add_ast_path option to \ + // linker which add N_AST stab entry to output + // See OutputFile::synthesizeDebugNotes in ObjectFile.cpp in ld64. + + StringRef oldFileName = ""; + StringRef oldDirPath = ""; + bool wroteStartSO = false; + std::unordered_set<std::string> seenFiles; + for (const DefinedAtom *atom : atomsNeedingDebugNotes) { + const auto &atomFile = cast<mach_o::MachOFile>(atom->file()); + assert(dyn_cast_or_null<lld::mach_o::DwarfDebugInfo>(atomFile.debugInfo()) + && "file for atom needing debug notes does not contain dwarf"); + auto &dwarf = cast<lld::mach_o::DwarfDebugInfo>(*atomFile.debugInfo()); + + auto &tu = dwarf.translationUnitSource(); + StringRef newFileName = tu.name; + StringRef newDirPath = tu.path; + + // Add an SO whenever the TU source file changes. + if (newFileName != oldFileName || newDirPath != oldDirPath) { + // Translation unit change, emit ending SO + if (oldFileName != "") + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, "")); + + oldFileName = newFileName; + oldDirPath = newDirPath; + + // If newDirPath doesn't end with a '/' we need to add one: + if (newDirPath.back() != '/') { + char *p = + file.ownedAllocations.Allocate<char>(newDirPath.size() + 2); + memcpy(p, newDirPath.data(), newDirPath.size()); + p[newDirPath.size()] = '/'; + p[newDirPath.size() + 1] = '\0'; + newDirPath = p; + } + + // New translation unit, emit start SOs: + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newDirPath)); + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newFileName)); + + // Synthesize OSO for start of file. + char *fullPath = nullptr; + { + SmallString<1024> pathBuf(atomFile.path()); + if (auto EC = llvm::sys::fs::make_absolute(pathBuf)) + return llvm::errorCodeToError(EC); + fullPath = file.ownedAllocations.Allocate<char>(pathBuf.size() + 1); + memcpy(fullPath, pathBuf.c_str(), pathBuf.size() + 1); + } + + // Get mod time. + uint32_t modTime = 0; + llvm::sys::fs::file_status stat; + if (!llvm::sys::fs::status(fullPath, stat)) + if (llvm::sys::fs::exists(stat)) + modTime = llvm::sys::toTimeT(stat.getLastModificationTime()); + + _stabs.push_back(mach_o::Stab(nullptr, N_OSO, _ctx.getCPUSubType(), 1, + modTime, fullPath)); + // <rdar://problem/6337329> linker should put cpusubtype in n_sect field + // of nlist entry for N_OSO debug note entries. + wroteStartSO = true; + } + + if (atom->contentType() == DefinedAtom::typeCode) { + // Synthesize BNSYM and start FUN stabs. + _stabs.push_back(mach_o::Stab(atom, N_BNSYM, 1, 0, 0, "")); + _stabs.push_back(mach_o::Stab(atom, N_FUN, 1, 0, 0, atom->name())); + // Synthesize any SOL stabs needed + // FIXME: add SOL stabs. + _stabs.push_back(mach_o::Stab(nullptr, N_FUN, 0, 0, + atom->rawContent().size(), "")); + _stabs.push_back(mach_o::Stab(nullptr, N_ENSYM, 1, 0, + atom->rawContent().size(), "")); + } else { + if (atom->scope() == Atom::scopeTranslationUnit) + _stabs.push_back(mach_o::Stab(atom, N_STSYM, 1, 0, 0, atom->name())); + else + _stabs.push_back(mach_o::Stab(nullptr, N_GSYM, 1, 0, 0, atom->name())); + } + } + + // Emit ending SO if necessary. + if (wroteStartSO) + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, "")); + + // Copy any stabs from .o file. + for (const auto *objFile : filesWithStabs) { + const auto &stabsList = + cast<mach_o::StabsDebugInfo>(objFile->debugInfo())->stabs(); + for (auto &stab : stabsList) { + // FIXME: Drop stabs whose atoms have been dead-stripped. + _stabs.push_back(stab); + } + } + + return llvm::Error::success(); +} + +uint16_t Util::descBits(const DefinedAtom* atom) { + uint16_t desc = 0; + switch (atom->merge()) { + case lld::DefinedAtom::mergeNo: + case lld::DefinedAtom::mergeAsTentative: + break; + case lld::DefinedAtom::mergeAsWeak: + case lld::DefinedAtom::mergeAsWeakAndAddressUsed: + desc |= N_WEAK_DEF; + break; + case lld::DefinedAtom::mergeSameNameAndSize: + case lld::DefinedAtom::mergeByLargestSection: + case lld::DefinedAtom::mergeByContent: + llvm_unreachable("Unsupported DefinedAtom::merge()"); + break; + } + if (atom->contentType() == lld::DefinedAtom::typeResolver) + desc |= N_SYMBOL_RESOLVER; + if (atom->contentType() == lld::DefinedAtom::typeMachHeader) + desc |= REFERENCED_DYNAMICALLY; + if (_archHandler.isThumbFunction(*atom)) + desc |= N_ARM_THUMB_DEF; + if (atom->deadStrip() == DefinedAtom::deadStripNever && + _ctx.outputMachOType() == llvm::MachO::MH_OBJECT) { + if ((atom->contentType() != DefinedAtom::typeInitializerPtr) + && (atom->contentType() != DefinedAtom::typeTerminatorPtr)) + desc |= N_NO_DEAD_STRIP; + } + return desc; +} + +bool Util::AtomSorter::operator()(const AtomAndIndex &left, + const AtomAndIndex &right) { + return (left.atom->name().compare(right.atom->name()) < 0); +} + +llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom, + bool &inGlobalsRegion, + SymbolScope &scope) { + bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + switch (atom->scope()) { + case Atom::scopeTranslationUnit: + scope = 0; + inGlobalsRegion = false; + return llvm::Error::success(); + case Atom::scopeLinkageUnit: + if ((_ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) && + _ctx.exportSymbolNamed(atom->name())) { + return llvm::make_error<GenericError>( + Twine("cannot export hidden symbol ") + atom->name()); + } + if (rMode) { + if (_ctx.keepPrivateExterns()) { + // -keep_private_externs means keep in globals region as N_PEXT. + scope = N_PEXT | N_EXT; + inGlobalsRegion = true; + return llvm::Error::success(); + } + } + // scopeLinkageUnit symbols are no longer global once linked. + scope = N_PEXT; + inGlobalsRegion = false; + return llvm::Error::success(); + case Atom::scopeGlobal: + if (_ctx.exportRestrictMode()) { + if (_ctx.exportSymbolNamed(atom->name())) { + scope = N_EXT; + inGlobalsRegion = true; + return llvm::Error::success(); + } else { + scope = N_PEXT; + inGlobalsRegion = false; + return llvm::Error::success(); + } + } else { + scope = N_EXT; + inGlobalsRegion = true; + return llvm::Error::success(); + } + break; + } + llvm_unreachable("atom->scope() unknown enum value"); +} + + + +llvm::Error Util::addSymbols(const lld::File &atomFile, + NormalizedFile &file) { + bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + // Mach-O symbol table has four regions: stabs, locals, globals, undefs. + + // Add all stabs. + for (auto &stab : _stabs) { + Symbol sym; + sym.type = static_cast<NListType>(stab.type); + sym.scope = 0; + sym.sect = stab.other; + sym.desc = stab.desc; + if (stab.atom) + sym.value = _atomToAddress[stab.atom]; + else + sym.value = stab.value; + sym.name = stab.str; + file.stabsSymbols.push_back(sym); + } + + // Add all local (non-global) symbols in address order + std::vector<AtomAndIndex> globals; + globals.reserve(512); + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + if (!atom->name().empty()) { + SymbolScope symbolScope; + bool inGlobalsRegion; + if (auto ec = getSymbolTableRegion(atom, inGlobalsRegion, symbolScope)){ + return ec; + } + if (inGlobalsRegion) { + AtomAndIndex ai = { atom, sect->finalSectionIndex, symbolScope }; + globals.push_back(ai); + } else { + Symbol sym; + sym.name = atom->name(); + sym.type = N_SECT; + sym.scope = symbolScope; + sym.sect = sect->finalSectionIndex; + sym.desc = descBits(atom); + sym.value = _atomToAddress[atom]; + _atomToSymbolIndex[atom] = file.localSymbols.size(); + file.localSymbols.push_back(sym); + } + } else if (rMode && _archHandler.needsLocalSymbolInRelocatableFile(atom)){ + // Create 'Lxxx' labels for anonymous atoms if archHandler says so. + static unsigned tempNum = 1; + char tmpName[16]; + sprintf(tmpName, "L%04u", tempNum++); + StringRef tempRef(tmpName); + Symbol sym; + sym.name = tempRef.copy(file.ownedAllocations); + sym.type = N_SECT; + sym.scope = 0; + sym.sect = sect->finalSectionIndex; + sym.desc = 0; + sym.value = _atomToAddress[atom]; + _atomToSymbolIndex[atom] = file.localSymbols.size(); + file.localSymbols.push_back(sym); + } + } + } + + // Sort global symbol alphabetically, then add to symbol table. + std::sort(globals.begin(), globals.end(), AtomSorter()); + const uint32_t globalStartIndex = file.localSymbols.size(); + for (AtomAndIndex &ai : globals) { + Symbol sym; + sym.name = ai.atom->name(); + sym.type = N_SECT; + sym.scope = ai.scope; + sym.sect = ai.index; + sym.desc = descBits(static_cast<const DefinedAtom*>(ai.atom)); + sym.value = _atomToAddress[ai.atom]; + _atomToSymbolIndex[ai.atom] = globalStartIndex + file.globalSymbols.size(); + file.globalSymbols.push_back(sym); + } + + // Sort undefined symbol alphabetically, then add to symbol table. + std::vector<AtomAndIndex> undefs; + undefs.reserve(128); + for (const UndefinedAtom *atom : atomFile.undefined()) { + AtomAndIndex ai = { atom, 0, N_EXT }; + undefs.push_back(ai); + } + for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) { + AtomAndIndex ai = { atom, 0, N_EXT }; + undefs.push_back(ai); + } + std::sort(undefs.begin(), undefs.end(), AtomSorter()); + const uint32_t start = file.globalSymbols.size() + file.localSymbols.size(); + for (AtomAndIndex &ai : undefs) { + Symbol sym; + uint16_t desc = 0; + if (!rMode) { + uint8_t ordinal = 0; + if (!_ctx.useFlatNamespace()) + ordinal = dylibOrdinal(dyn_cast<SharedLibraryAtom>(ai.atom)); + llvm::MachO::SET_LIBRARY_ORDINAL(desc, ordinal); + } + sym.name = ai.atom->name(); + sym.type = N_UNDF; + sym.scope = ai.scope; + sym.sect = 0; + sym.desc = desc; + sym.value = 0; + _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start; + file.undefinedSymbols.push_back(sym); + } + + return llvm::Error::success(); +} + +const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) { + for (const Reference *ref : *lpAtom) { + if (_archHandler.isLazyPointer(*ref)) { + return ref->target(); + } + } + return nullptr; +} + +const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) { + for (const Reference *ref : *stubAtom) { + if (const Atom *ta = ref->target()) { + if (const DefinedAtom *lpAtom = dyn_cast<DefinedAtom>(ta)) { + const Atom *target = targetOfLazyPointer(lpAtom); + if (target) + return target; + } + } + } + return nullptr; +} + +void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) { + for (SectionInfo *si : _sectionInfos) { + Section &normSect = file.sections[si->normalizedSectionIndex]; + switch (si->type) { + case llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS: + for (const AtomInfo &info : si->atomsAndOffsets) { + bool foundTarget = false; + for (const Reference *ref : *info.atom) { + const Atom *target = ref->target(); + if (target) { + if (isa<const SharedLibraryAtom>(target)) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + foundTarget = true; + } else { + normSect.indirectSymbols.push_back( + llvm::MachO::INDIRECT_SYMBOL_LOCAL); + } + } + } + if (!foundTarget) { + normSect.indirectSymbols.push_back( + llvm::MachO::INDIRECT_SYMBOL_ABS); + } + } + break; + case llvm::MachO::S_LAZY_SYMBOL_POINTERS: + for (const AtomInfo &info : si->atomsAndOffsets) { + const Atom *target = targetOfLazyPointer(info.atom); + if (target) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + } + } + break; + case llvm::MachO::S_SYMBOL_STUBS: + for (const AtomInfo &info : si->atomsAndOffsets) { + const Atom *target = targetOfStub(info.atom); + if (target) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + } + } + break; + default: + break; + } + } +} + +void Util::addDependentDylibs(const lld::File &atomFile, + NormalizedFile &nFile) { + // Scan all imported symbols and build up list of dylibs they are from. + int ordinal = 1; + for (const auto *dylib : _ctx.allDylibs()) { + DylibPathToInfo::iterator pos = _dylibInfo.find(dylib->installName()); + if (pos == _dylibInfo.end()) { + DylibInfo info; + bool flatNamespaceAtom = dylib == _ctx.flatNamespaceFile(); + + // If we're in -flat_namespace mode (or this atom came from the flat + // namespace file under -undefined dynamic_lookup) then use the flat + // lookup ordinal. + if (flatNamespaceAtom || _ctx.useFlatNamespace()) + info.ordinal = BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + else + info.ordinal = ordinal++; + info.hasWeak = false; + info.hasNonWeak = !info.hasWeak; + _dylibInfo[dylib->installName()] = info; + + // Unless this was a flat_namespace atom, record the source dylib. + if (!flatNamespaceAtom) { + DependentDylib depInfo; + depInfo.path = dylib->installName(); + depInfo.kind = llvm::MachO::LC_LOAD_DYLIB; + depInfo.currentVersion = _ctx.dylibCurrentVersion(dylib->path()); + depInfo.compatVersion = _ctx.dylibCompatVersion(dylib->path()); + nFile.dependentDylibs.push_back(depInfo); + } + } else { + pos->second.hasWeak = false; + pos->second.hasNonWeak = !pos->second.hasWeak; + } + } + // Automatically weak link dylib in which all symbols are weak (canBeNull). + for (DependentDylib &dep : nFile.dependentDylibs) { + DylibInfo &info = _dylibInfo[dep.path]; + if (info.hasWeak && !info.hasNonWeak) + dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB; + else if (_ctx.isUpwardDylib(dep.path)) + dep.kind = llvm::MachO::LC_LOAD_UPWARD_DYLIB; + } +} + +int Util::dylibOrdinal(const SharedLibraryAtom *sa) { + return _dylibInfo[sa->loadName()].ordinal; +} + +void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex, + uint64_t &segmentStartAddr) { + segmentIndex = 0; + for (const SegmentInfo *seg : _segmentInfos) { + if ((seg->address <= sect->address) + && (seg->address+seg->size >= sect->address+sect->size)) { + segmentStartAddr = seg->address; + return; + } + ++segmentIndex; + } + llvm_unreachable("section not in any segment"); +} + +uint32_t Util::sectionIndexForAtom(const Atom *atom) { + uint64_t address = _atomToAddress[atom]; + for (const SectionInfo *si : _sectionInfos) { + if ((si->address <= address) && (address < si->address+si->size)) + return si->finalSectionIndex; + } + llvm_unreachable("atom not in any section"); +} + +void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) { + if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + return; + + // Utility function for ArchHandler to find symbol index for an atom. + auto symIndexForAtom = [&] (const Atom &atom) -> uint32_t { + auto pos = _atomToSymbolIndex.find(&atom); + assert(pos != _atomToSymbolIndex.end()); + return pos->second; + }; + + // Utility function for ArchHandler to find section index for an atom. + auto sectIndexForAtom = [&] (const Atom &atom) -> uint32_t { + return sectionIndexForAtom(&atom); + }; + + // Utility function for ArchHandler to find address of atom in output file. + auto addressForAtom = [&] (const Atom &atom) -> uint64_t { + auto pos = _atomToAddress.find(&atom); + assert(pos != _atomToAddress.end()); + return pos->second; + }; + + for (SectionInfo *si : _sectionInfos) { + Section &normSect = file.sections[si->normalizedSectionIndex]; + for (const AtomInfo &info : si->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + for (const Reference *ref : *atom) { + // Skip emitting relocs for sections which are always able to be + // implicitly regenerated and where the relocation targets an address + // which is defined. + if (si->relocsToDefinedCanBeImplicit && isa<DefinedAtom>(ref->target())) + continue; + _archHandler.appendSectionRelocations(*atom, info.offsetInSection, *ref, + symIndexForAtom, + sectIndexForAtom, + addressForAtom, + normSect.relocations); + } + } + } +} + +void Util::addFunctionStarts(const lld::File &, NormalizedFile &file) { + if (!_ctx.generateFunctionStartsLoadCommand()) + return; + file.functionStarts.reserve(8192); + // Delta compress function starts, starting with the mach header symbol. + const uint64_t badAddress = ~0ULL; + uint64_t addr = badAddress; + for (SectionInfo *si : _sectionInfos) { + for (const AtomInfo &info : si->atomsAndOffsets) { + auto type = info.atom->contentType(); + if (type == DefinedAtom::typeMachHeader) { + addr = _atomToAddress[info.atom]; + continue; + } + if (type != DefinedAtom::typeCode) + continue; + assert(addr != badAddress && "Missing mach header symbol"); + // Skip atoms which have 0 size. This is so that LC_FUNCTION_STARTS + // can't spill in to the next section. + if (!info.atom->size()) + continue; + uint64_t nextAddr = _atomToAddress[info.atom]; + if (_archHandler.isThumbFunction(*info.atom)) + nextAddr |= 1; + uint64_t delta = nextAddr - addr; + if (delta) { + ByteBuffer buffer; + buffer.append_uleb128(delta); + file.functionStarts.insert(file.functionStarts.end(), buffer.bytes(), + buffer.bytes() + buffer.size()); + } + addr = nextAddr; + } + } + + // Null terminate, and pad to pointer size for this arch. + file.functionStarts.push_back(0); + + auto size = file.functionStarts.size(); + for (unsigned i = size, e = llvm::alignTo(size, _ctx.is64Bit() ? 8 : 4); + i != e; ++i) + file.functionStarts.push_back(0); +} + +void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) { + if (!_ctx.generateDataInCodeLoadCommand()) + return; + for (SectionInfo *si : _sectionInfos) { + for (const AtomInfo &info : si->atomsAndOffsets) { + // Atoms that contain data-in-code have "transition" references + // which mark a point where the embedded data starts of ends. + // This needs to be converted to the mach-o format which is an array + // of data-in-code ranges. + uint32_t startOffset = 0; + DataRegionType mode = DataRegionType(0); + for (const Reference *ref : *info.atom) { + if (ref->kindNamespace() != Reference::KindNamespace::mach_o) + continue; + if (_archHandler.isDataInCodeTransition(ref->kindValue())) { + DataRegionType nextMode = (DataRegionType)ref->addend(); + if (mode != nextMode) { + if (mode != 0) { + // Found end data range, so make range entry. + DataInCode entry; + entry.offset = si->address + info.offsetInSection + startOffset; + entry.length = ref->offsetInAtom() - startOffset; + entry.kind = mode; + file.dataInCode.push_back(entry); + } + } + mode = nextMode; + startOffset = ref->offsetInAtom(); + } + } + if (mode != 0) { + // Function ends with data (no end transition). + DataInCode entry; + entry.offset = si->address + info.offsetInSection + startOffset; + entry.length = info.atom->size() - startOffset; + entry.kind = mode; + file.dataInCode.push_back(entry); + } + } + } +} + +void Util::addRebaseAndBindingInfo(const lld::File &atomFile, + NormalizedFile &nFile) { + if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT) + return; + + uint8_t segmentIndex; + uint64_t segmentStartAddr; + for (SectionInfo *sect : _sectionInfos) { + segIndexForSection(sect, segmentIndex, segmentStartAddr); + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + for (const Reference *ref : *atom) { + uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom() + - segmentStartAddr; + const Atom* targ = ref->target(); + if (_archHandler.isPointer(*ref)) { + // A pointer to a DefinedAtom requires rebasing. + if (isa<DefinedAtom>(targ)) { + RebaseLocation rebase; + rebase.segIndex = segmentIndex; + rebase.segOffset = segmentOffset; + rebase.kind = llvm::MachO::REBASE_TYPE_POINTER; + nFile.rebasingInfo.push_back(rebase); + } + // A pointer to an SharedLibraryAtom requires binding. + if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) { + BindLocation bind; + bind.segIndex = segmentIndex; + bind.segOffset = segmentOffset; + bind.kind = llvm::MachO::BIND_TYPE_POINTER; + bind.canBeNull = sa->canBeNullAtRuntime(); + bind.ordinal = dylibOrdinal(sa); + bind.symbolName = targ->name(); + bind.addend = ref->addend(); + nFile.bindingInfo.push_back(bind); + } + } + else if (_archHandler.isLazyPointer(*ref)) { + BindLocation bind; + if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) { + bind.ordinal = dylibOrdinal(sa); + } else { + bind.ordinal = llvm::MachO::BIND_SPECIAL_DYLIB_SELF; + } + bind.segIndex = segmentIndex; + bind.segOffset = segmentOffset; + bind.kind = llvm::MachO::BIND_TYPE_POINTER; + bind.canBeNull = false; //sa->canBeNullAtRuntime(); + bind.symbolName = targ->name(); + bind.addend = ref->addend(); + nFile.lazyBindingInfo.push_back(bind); + } + } + } + } +} + +void Util::addExportInfo(const lld::File &atomFile, NormalizedFile &nFile) { + if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT) + return; + + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + if (atom->scope() != Atom::scopeGlobal) + continue; + if (_ctx.exportRestrictMode()) { + if (!_ctx.exportSymbolNamed(atom->name())) + continue; + } + Export exprt; + exprt.name = atom->name(); + exprt.offset = _atomToAddress[atom] - _ctx.baseAddress(); + exprt.kind = EXPORT_SYMBOL_FLAGS_KIND_REGULAR; + if (atom->merge() == DefinedAtom::mergeAsWeak) + exprt.flags = EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + else + exprt.flags = 0; + exprt.otherOffset = 0; + exprt.otherName = StringRef(); + nFile.exportInfo.push_back(exprt); + } + } +} + +uint32_t Util::fileFlags() { + // FIXME: these need to determined at runtime. + if (_ctx.outputMachOType() == MH_OBJECT) { + return _subsectionsViaSymbols ? MH_SUBSECTIONS_VIA_SYMBOLS : 0; + } else { + uint32_t flags = MH_DYLDLINK; + if (!_ctx.useFlatNamespace()) + flags |= MH_TWOLEVEL | MH_NOUNDEFS; + if ((_ctx.outputMachOType() == MH_EXECUTE) && _ctx.PIE()) + flags |= MH_PIE; + if (_hasTLVDescriptors) + flags |= (MH_PIE | MH_HAS_TLV_DESCRIPTORS); + return flags; + } +} + +} // end anonymous namespace + +namespace lld { +namespace mach_o { +namespace normalized { + +/// Convert a set of Atoms into a normalized mach-o file. +llvm::Expected<std::unique_ptr<NormalizedFile>> +normalizedFromAtoms(const lld::File &atomFile, + const MachOLinkingContext &context) { + // The util object buffers info until the normalized file can be made. + Util util(context); + util.processDefinedAtoms(atomFile); + util.organizeSections(); + + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + NormalizedFile &normFile = *f.get(); + normFile.arch = context.arch(); + normFile.fileType = context.outputMachOType(); + normFile.flags = util.fileFlags(); + normFile.stackSize = context.stackSize(); + normFile.installName = context.installName(); + normFile.currentVersion = context.currentVersion(); + normFile.compatVersion = context.compatibilityVersion(); + normFile.os = context.os(); + + // If we are emitting an object file, then the min version is the maximum + // of the min's of all the source files and the cmdline. + if (normFile.fileType == llvm::MachO::MH_OBJECT) + normFile.minOSverson = std::max(context.osMinVersion(), util.minVersion()); + else + normFile.minOSverson = context.osMinVersion(); + + normFile.minOSVersionKind = util.minVersionCommandType(); + + normFile.sdkVersion = context.sdkVersion(); + normFile.sourceVersion = context.sourceVersion(); + + if (context.generateVersionLoadCommand() && + context.os() != MachOLinkingContext::OS::unknown) + normFile.hasMinVersionLoadCommand = true; + else if (normFile.fileType == llvm::MachO::MH_OBJECT && + util.allSourceFilesHaveMinVersions() && + ((normFile.os != MachOLinkingContext::OS::unknown) || + util.minVersionCommandType())) { + // If we emit an object file, then it should contain a min version load + // command if all of the source files also contained min version commands. + // Also, we either need to have a platform, or found a platform from the + // source object files. + normFile.hasMinVersionLoadCommand = true; + } + normFile.generateDataInCodeLoadCommand = + context.generateDataInCodeLoadCommand(); + normFile.pageSize = context.pageSize(); + normFile.rpaths = context.rpaths(); + util.addDependentDylibs(atomFile, normFile); + util.copySegmentInfo(normFile); + util.copySectionInfo(normFile); + util.assignAddressesToSections(normFile); + util.buildAtomToAddressMap(); + if (auto err = util.synthesizeDebugNotes(normFile)) + return std::move(err); + util.updateSectionInfo(normFile); + util.copySectionContent(normFile); + if (auto ec = util.addSymbols(atomFile, normFile)) { + return std::move(ec); + } + util.addIndirectSymbols(atomFile, normFile); + util.addRebaseAndBindingInfo(atomFile, normFile); + util.addExportInfo(atomFile, normFile); + util.addSectionRelocs(atomFile, normFile); + util.addFunctionStarts(atomFile, normFile); + util.buildDataInCodeArray(atomFile, normFile); + util.copyEntryPointAddress(normFile); + + return std::move(f); +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp new file mode 100644 index 000000000000..4b17f7b3a85f --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp @@ -0,0 +1,1635 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file Converts from in-memory normalized mach-o to in-memory Atoms. +/// +/// +------------+ +/// | normalized | +/// +------------+ +/// | +/// | +/// v +/// +-------+ +/// | Atoms | +/// +-------+ + +#include "MachONormalizedFile.h" +#include "ArchHandler.h" +#include "Atoms.h" +#include "File.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +#define DEBUG_TYPE "normalized-file-to-atoms" + +namespace lld { +namespace mach_o { + + +namespace { // anonymous + + +#define ENTRY(seg, sect, type, atomType) \ + {seg, sect, type, DefinedAtom::atomType } + +struct MachORelocatableSectionToAtomType { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + DefinedAtom::ContentType atomType; +}; + +const MachORelocatableSectionToAtomType sectsToAtomType[] = { + ENTRY("__TEXT", "__text", S_REGULAR, typeCode), + ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), + ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), + ENTRY("", "", S_CSTRING_LITERALS, typeCString), + ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), + ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), + ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), + ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), + ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), + ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), + ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), + ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), + ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), + ENTRY("__DATA", "__data", S_REGULAR, typeData), + ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), + ENTRY("__DATA", "__const", S_REGULAR, typeConstData), + ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), + ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, + typeInitializerPtr), + ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, + typeTerminatorPtr), + ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), + ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), + ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, + typeThunkTLV), + ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), + ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, + typeTLVInitialZeroFill), + ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), + ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), + ENTRY("", "", S_INTERPOSING, typeInterposingTuples), + ENTRY("__LD", "__compact_unwind", S_REGULAR, + typeCompactUnwindInfo), + ENTRY("", "", S_REGULAR, typeUnknown) +}; +#undef ENTRY + + +/// Figures out ContentType of a mach-o section. +DefinedAtom::ContentType atomTypeFromSection(const Section §ion, + bool &customSectionName) { + // First look for match of name and type. Empty names in table are wildcards. + customSectionName = false; + for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; + p->atomType != DefinedAtom::typeUnknown; ++p) { + if (p->sectionType != section.type) + continue; + if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) + continue; + if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) + continue; + customSectionName = p->segmentName.empty() && p->sectionName.empty(); + return p->atomType; + } + // Look for code denoted by section attributes + if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) + return DefinedAtom::typeCode; + + return DefinedAtom::typeUnknown; +} + +enum AtomizeModel { + atomizeAtSymbols, + atomizeFixedSize, + atomizePointerSize, + atomizeUTF8, + atomizeUTF16, + atomizeCFI, + atomizeCU, + atomizeCFString +}; + +/// Returns info on how to atomize a section of the specified ContentType. +void sectionParseInfo(DefinedAtom::ContentType atomType, + unsigned int &sizeMultiple, + DefinedAtom::Scope &scope, + DefinedAtom::Merge &merge, + AtomizeModel &atomizeModel) { + struct ParseInfo { + DefinedAtom::ContentType atomType; + unsigned int sizeMultiple; + DefinedAtom::Scope scope; + DefinedAtom::Merge merge; + AtomizeModel atomizeModel; + }; + + #define ENTRY(type, size, scope, merge, model) \ + {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } + + static const ParseInfo parseInfo[] = { + ENTRY(typeCode, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeData, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeConstData, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeConstant, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, + atomizeUTF8), + ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, + atomizeUTF16), + ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, + atomizeCFI), + ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, + atomizeCFString), + ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, + atomizePointerSize), + ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, + atomizePointerSize), + ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, + atomizeCU), + ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, + atomizePointerSize), + ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, + atomizePointerSize), + ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, + atomizeAtSymbols) + }; + #undef ENTRY + const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); + for (int i=0; i < tableLen; ++i) { + if (parseInfo[i].atomType == atomType) { + sizeMultiple = parseInfo[i].sizeMultiple; + scope = parseInfo[i].scope; + merge = parseInfo[i].merge; + atomizeModel = parseInfo[i].atomizeModel; + return; + } + } + + // Unknown type is atomized by symbols. + sizeMultiple = 1; + scope = DefinedAtom::scopeGlobal; + merge = DefinedAtom::mergeNo; + atomizeModel = atomizeAtSymbols; +} + + +Atom::Scope atomScope(uint8_t scope) { + switch (scope) { + case N_EXT: + return Atom::scopeGlobal; + case N_PEXT: + case N_PEXT | N_EXT: + return Atom::scopeLinkageUnit; + case 0: + return Atom::scopeTranslationUnit; + } + llvm_unreachable("unknown scope value!"); +} + +void appendSymbolsInSection(const std::vector<Symbol> &inSymbols, + uint32_t sectionIndex, + SmallVector<const Symbol *, 64> &outSyms) { + for (const Symbol &sym : inSymbols) { + // Only look at definition symbols. + if ((sym.type & N_TYPE) != N_SECT) + continue; + if (sym.sect != sectionIndex) + continue; + outSyms.push_back(&sym); + } +} + +void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, + MachOFile &file, uint64_t symbolAddr, StringRef symbolName, + uint16_t symbolDescFlags, Atom::Scope symbolScope, + uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { + // Mach-O symbol table does have size in it. Instead the size is the + // difference between this and the next symbol. + uint64_t size = nextSymbolAddr - symbolAddr; + uint64_t offset = symbolAddr - section.address; + bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; + if (isZeroFillSection(section.type)) { + file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, + noDeadStrip, copyRefs, §ion); + } else { + DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) + ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; + bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); + if (atomType == DefinedAtom::typeUnknown) { + // Mach-O needs a segment and section name. Concatentate those two + // with a / separator (e.g. "seg/sect") to fit into the lld model + // of just a section name. + std::string segSectName = section.segmentName.str() + + "/" + section.sectionName.str(); + file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, + merge, thumb, noDeadStrip, offset, + size, segSectName, true, §ion); + } else { + if ((atomType == lld::DefinedAtom::typeCode) && + (symbolDescFlags & N_SYMBOL_RESOLVER)) { + atomType = lld::DefinedAtom::typeResolver; + } + file.addDefinedAtom(symbolName, symbolScope, atomType, merge, + offset, size, thumb, noDeadStrip, copyRefs, §ion); + } + } +} + +llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, + const Section §ion, + const NormalizedFile &normalizedFile, + MachOFile &file, bool scatterable, + bool copyRefs) { + // Find section's index. + uint32_t sectIndex = 1; + for (auto § : normalizedFile.sections) { + if (§ == §ion) + break; + ++sectIndex; + } + + // Find all symbols in this section. + SmallVector<const Symbol *, 64> symbols; + appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); + appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); + + // Sort symbols. + std::sort(symbols.begin(), symbols.end(), + [](const Symbol *lhs, const Symbol *rhs) -> bool { + if (lhs == rhs) + return false; + // First by address. + uint64_t lhsAddr = lhs->value; + uint64_t rhsAddr = rhs->value; + if (lhsAddr != rhsAddr) + return lhsAddr < rhsAddr; + // If same address, one is an alias so sort by scope. + Atom::Scope lScope = atomScope(lhs->scope); + Atom::Scope rScope = atomScope(rhs->scope); + if (lScope != rScope) + return lScope < rScope; + // If same address and scope, see if one might be better as + // the alias. + bool lPrivate = (lhs->name.front() == 'l'); + bool rPrivate = (rhs->name.front() == 'l'); + if (lPrivate != rPrivate) + return lPrivate; + // If same address and scope, sort by name. + return lhs->name < rhs->name; + }); + + // Debug logging of symbols. + //for (const Symbol *sym : symbols) + // llvm::errs() << " sym: " + // << llvm::format("0x%08llx ", (uint64_t)sym->value) + // << ", " << sym->name << "\n"; + + // If section has no symbols and no content, there are no atoms. + if (symbols.empty() && section.content.empty()) + return llvm::Error::success(); + + if (symbols.empty()) { + // Section has no symbols, put all content in one anoymous atom. + atomFromSymbol(atomType, section, file, section.address, StringRef(), + 0, Atom::scopeTranslationUnit, + section.address + section.content.size(), + scatterable, copyRefs); + } + else if (symbols.front()->value != section.address) { + // Section has anonymous content before first symbol. + atomFromSymbol(atomType, section, file, section.address, StringRef(), + 0, Atom::scopeTranslationUnit, symbols.front()->value, + scatterable, copyRefs); + } + + const Symbol *lastSym = nullptr; + for (const Symbol *sym : symbols) { + if (lastSym != nullptr) { + // Ignore any assembler added "ltmpNNN" symbol at start of section + // if there is another symbol at the start. + if ((lastSym->value != sym->value) + || lastSym->value != section.address + || !lastSym->name.startswith("ltmp")) { + atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, + lastSym->desc, atomScope(lastSym->scope), sym->value, + scatterable, copyRefs); + } + } + lastSym = sym; + } + if (lastSym != nullptr) { + atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, + lastSym->desc, atomScope(lastSym->scope), + section.address + section.content.size(), + scatterable, copyRefs); + } + + // If object built without .subsections_via_symbols, add reference chain. + if (!scatterable) { + MachODefinedAtom *prevAtom = nullptr; + file.eachAtomInSection(section, + [&](MachODefinedAtom *atom, uint64_t offset)->void { + if (prevAtom) + prevAtom->addReference(Reference::KindNamespace::all, + Reference::KindArch::all, + Reference::kindLayoutAfter, 0, atom, 0); + prevAtom = atom; + }); + } + + return llvm::Error::success(); +} + +llvm::Error processSection(DefinedAtom::ContentType atomType, + const Section §ion, + bool customSectionName, + const NormalizedFile &normalizedFile, + MachOFile &file, bool scatterable, + bool copyRefs) { + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + + // Get info on how to atomize section. + unsigned int sizeMultiple; + DefinedAtom::Scope scope; + DefinedAtom::Merge merge; + AtomizeModel atomizeModel; + sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); + + // Validate section size. + if ((section.content.size() % sizeMultiple) != 0) + return llvm::make_error<GenericError>(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " has size (" + + Twine(section.content.size()) + + ") which is not a multiple of " + + Twine(sizeMultiple)); + + if (atomizeModel == atomizeAtSymbols) { + // Break section up into atoms each with a fixed size. + return processSymboledSection(atomType, section, normalizedFile, file, + scatterable, copyRefs); + } else { + unsigned int size; + for (unsigned int offset = 0, e = section.content.size(); offset != e;) { + switch (atomizeModel) { + case atomizeFixedSize: + // Break section up into atoms each with a fixed size. + size = sizeMultiple; + break; + case atomizePointerSize: + // Break section up into atoms each the size of a pointer. + size = is64 ? 8 : 4; + break; + case atomizeUTF8: + // Break section up into zero terminated c-strings. + size = 0; + for (unsigned int i = offset; i < e; ++i) { + if (section.content[i] == 0) { + size = i + 1 - offset; + break; + } + } + break; + case atomizeUTF16: + // Break section up into zero terminated UTF16 strings. + size = 0; + for (unsigned int i = offset; i < e; i += 2) { + if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { + size = i + 2 - offset; + break; + } + } + break; + case atomizeCFI: + // Break section up into dwarf unwind CFIs (FDE or CIE). + size = read32(§ion.content[offset], isBig) + 4; + if (offset+size > section.content.size()) { + return llvm::make_error<GenericError>(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " is malformed. Size of CFI " + "starting at offset (" + + Twine(offset) + + ") is past end of section."); + } + break; + case atomizeCU: + // Break section up into compact unwind entries. + size = is64 ? 32 : 20; + break; + case atomizeCFString: + // Break section up into NS/CFString objects. + size = is64 ? 32 : 16; + break; + case atomizeAtSymbols: + break; + } + if (size == 0) { + return llvm::make_error<GenericError>(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " is malformed. The last atom " + "is not zero terminated."); + } + if (customSectionName) { + // Mach-O needs a segment and section name. Concatentate those two + // with a / separator (e.g. "seg/sect") to fit into the lld model + // of just a section name. + std::string segSectName = section.segmentName.str() + + "/" + section.sectionName.str(); + file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, + merge, false, false, offset, + size, segSectName, true, §ion); + } else { + file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, + false, false, copyRefs, §ion); + } + offset += size; + } + } + return llvm::Error::success(); +} + +const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, + uint64_t address) { + for (const Section &s : normalizedFile.sections) { + uint64_t sAddr = s.address; + if ((sAddr <= address) && (address < sAddr+s.content.size())) { + return &s; + } + } + return nullptr; +} + +const MachODefinedAtom * +findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, + uint64_t addr, Reference::Addend &addend) { + const Section *sect = nullptr; + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) + return nullptr; + + uint32_t offsetInTarget; + uint64_t offsetInSect = addr - sect->address; + auto atom = + file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); + addend = offsetInTarget; + return atom; +} + +// Walks all relocations for a section in a normalized .o file and +// creates corresponding lld::Reference objects. +llvm::Error convertRelocs(const Section §ion, + const NormalizedFile &normalizedFile, + bool scatterable, + MachOFile &file, + ArchHandler &handler) { + // Utility function for ArchHandler to find atom by its address. + auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, + const lld::Atom **atom, Reference::Addend *addend) + -> llvm::Error { + if (sectIndex > normalizedFile.sections.size()) + return llvm::make_error<GenericError>(Twine("out of range section " + "index (") + Twine(sectIndex) + ")"); + const Section *sect = nullptr; + if (sectIndex == 0) { + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) + return llvm::make_error<GenericError>(Twine("address (" + Twine(addr) + + ") is not in any section")); + } else { + sect = &normalizedFile.sections[sectIndex-1]; + } + uint32_t offsetInTarget; + uint64_t offsetInSect = addr - sect->address; + *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); + *addend = offsetInTarget; + return llvm::Error::success(); + }; + + // Utility function for ArchHandler to find atom by its symbol index. + auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) + -> llvm::Error { + // Find symbol from index. + const Symbol *sym = nullptr; + uint32_t numStabs = normalizedFile.stabsSymbols.size(); + uint32_t numLocal = normalizedFile.localSymbols.size(); + uint32_t numGlobal = normalizedFile.globalSymbols.size(); + uint32_t numUndef = normalizedFile.undefinedSymbols.size(); + assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?"); + if (symbolIndex < numStabs+numLocal) { + sym = &normalizedFile.localSymbols[symbolIndex-numStabs]; + } else if (symbolIndex < numStabs+numLocal+numGlobal) { + sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal]; + } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) { + sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal- + numGlobal]; + } else { + return llvm::make_error<GenericError>(Twine("symbol index (") + + Twine(symbolIndex) + ") out of range"); + } + + // Find atom from symbol. + if ((sym->type & N_TYPE) == N_SECT) { + if (sym->sect > normalizedFile.sections.size()) + return llvm::make_error<GenericError>(Twine("symbol section index (") + + Twine(sym->sect) + ") out of range "); + const Section &symSection = normalizedFile.sections[sym->sect-1]; + uint64_t targetOffsetInSect = sym->value - symSection.address; + MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, + targetOffsetInSect); + if (target) { + *result = target; + return llvm::Error::success(); + } + return llvm::make_error<GenericError>("no atom found for defined symbol"); + } else if ((sym->type & N_TYPE) == N_UNDF) { + const lld::Atom *target = file.findUndefAtom(sym->name); + if (target) { + *result = target; + return llvm::Error::success(); + } + return llvm::make_error<GenericError>("no undefined atom found for sym"); + } else { + // Search undefs + return llvm::make_error<GenericError>("no atom found for symbol"); + } + }; + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + // Use old-school iterator so that paired relocations can be grouped. + for (auto it=section.relocations.begin(), e=section.relocations.end(); + it != e; ++it) { + const Relocation &reloc = *it; + // Find atom this relocation is in. + if (reloc.offset > section.content.size()) + return llvm::make_error<GenericError>( + Twine("r_address (") + Twine(reloc.offset) + + ") is larger than section size (" + + Twine(section.content.size()) + ")"); + uint32_t offsetInAtom; + MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, + reloc.offset, + &offsetInAtom); + assert(inAtom && "r_address in range, should have found atom"); + uint64_t fixupAddress = section.address + reloc.offset; + + const lld::Atom *target = nullptr; + Reference::Addend addend = 0; + Reference::KindValue kind; + if (handler.isPairedReloc(reloc)) { + // Handle paired relocations together. + const Relocation &reloc2 = *++it; + auto relocErr = handler.getPairReferenceInfo( + reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, + atomByAddr, atomBySymbol, &kind, &target, &addend); + if (relocErr) { + return handleErrors(std::move(relocErr), + [&](std::unique_ptr<GenericError> GE) { + return llvm::make_error<GenericError>( + Twine("bad relocation (") + GE->getMessage() + + ") in section " + + section.segmentName + "/" + section.sectionName + + " (r1_address=" + Twine::utohexstr(reloc.offset) + + ", r1_type=" + Twine(reloc.type) + + ", r1_extern=" + Twine(reloc.isExtern) + + ", r1_length=" + Twine((int)reloc.length) + + ", r1_pcrel=" + Twine(reloc.pcRel) + + (!reloc.scattered ? (Twine(", r1_symbolnum=") + + Twine(reloc.symbol)) + : (Twine(", r1_scattered=1, r1_value=") + + Twine(reloc.value))) + + ")" + + ", (r2_address=" + Twine::utohexstr(reloc2.offset) + + ", r2_type=" + Twine(reloc2.type) + + ", r2_extern=" + Twine(reloc2.isExtern) + + ", r2_length=" + Twine((int)reloc2.length) + + ", r2_pcrel=" + Twine(reloc2.pcRel) + + (!reloc2.scattered ? (Twine(", r2_symbolnum=") + + Twine(reloc2.symbol)) + : (Twine(", r2_scattered=1, r2_value=") + + Twine(reloc2.value))) + + ")" ); + }); + } + } + else { + // Use ArchHandler to convert relocation record into information + // needed to instantiate an lld::Reference object. + auto relocErr = handler.getReferenceInfo( + reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, + atomBySymbol, &kind, &target, &addend); + if (relocErr) { + return handleErrors(std::move(relocErr), + [&](std::unique_ptr<GenericError> GE) { + return llvm::make_error<GenericError>( + Twine("bad relocation (") + GE->getMessage() + + ") in section " + + section.segmentName + "/" + section.sectionName + + " (r_address=" + Twine::utohexstr(reloc.offset) + + ", r_type=" + Twine(reloc.type) + + ", r_extern=" + Twine(reloc.isExtern) + + ", r_length=" + Twine((int)reloc.length) + + ", r_pcrel=" + Twine(reloc.pcRel) + + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) + : (Twine(", r_scattered=1, r_value=") + + Twine(reloc.value))) + + ")" ); + }); + } + } + // Instantiate an lld::Reference object and add to its atom. + inAtom->addReference(Reference::KindNamespace::mach_o, + handler.kindArch(), + kind, offsetInAtom, target, addend); + } + + return llvm::Error::success(); +} + +bool isDebugInfoSection(const Section §ion) { + if ((section.attributes & S_ATTR_DEBUG) == 0) + return false; + return section.segmentName.equals("__DWARF"); +} + +static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) { + std::string strName = name.str(); + for (auto *atom : file.defined()) + if (atom->name() == strName) + return atom; + return nullptr; +} + +static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) { + char *strCopy = alloc.Allocate<char>(str.size() + 1); + memcpy(strCopy, str.data(), str.size()); + strCopy[str.size()] = '\0'; + return strCopy; +} + +llvm::Error parseStabs(MachOFile &file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + + if (normalizedFile.stabsSymbols.empty()) + return llvm::Error::success(); + + // FIXME: Kill this off when we can move to sane yaml parsing. + std::unique_ptr<BumpPtrAllocator> allocator; + if (copyRefs) + allocator = llvm::make_unique<BumpPtrAllocator>(); + + enum { start, inBeginEnd } state = start; + + const Atom *currentAtom = nullptr; + uint64_t currentAtomAddress = 0; + StabsDebugInfo::StabsList stabsList; + for (const auto &stabSym : normalizedFile.stabsSymbols) { + Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc, + stabSym.value, stabSym.name); + switch (state) { + case start: + switch (static_cast<StabType>(stabSym.type)) { + case N_BNSYM: + state = inBeginEnd; + currentAtomAddress = stabSym.value; + Reference::Addend addend; + currentAtom = findAtomCoveringAddress(normalizedFile, file, + currentAtomAddress, addend); + if (addend != 0) + return llvm::make_error<GenericError>( + "Non-zero addend for BNSYM '" + stabSym.name + "' in " + + file.path()); + if (currentAtom) + stab.atom = currentAtom; + else { + // FIXME: ld64 just issues a warning here - should we match that? + return llvm::make_error<GenericError>( + "can't find atom for stabs BNSYM at " + + Twine::utohexstr(stabSym.value) + " in " + file.path()); + } + break; + case N_SO: + case N_OSO: + // Not associated with an atom, just copy. + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + break; + case N_GSYM: { + auto colonIdx = stabSym.name.find(':'); + if (colonIdx != StringRef::npos) { + StringRef name = stabSym.name.substr(0, colonIdx); + currentAtom = findDefinedAtomByName(file, "_" + name); + stab.atom = currentAtom; + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + } else { + currentAtom = findDefinedAtomByName(file, stabSym.name); + stab.atom = currentAtom; + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + } + if (stab.atom == nullptr) + return llvm::make_error<GenericError>( + "can't find atom for N_GSYM stabs" + stabSym.name + + " in " + file.path()); + break; + } + case N_FUN: + return llvm::make_error<GenericError>( + "old-style N_FUN stab '" + stabSym.name + "' unsupported"); + default: + return llvm::make_error<GenericError>( + "unrecognized stab symbol '" + stabSym.name + "'"); + } + break; + case inBeginEnd: + stab.atom = currentAtom; + switch (static_cast<StabType>(stabSym.type)) { + case N_ENSYM: + state = start; + currentAtom = nullptr; + break; + case N_FUN: + // Just copy the string. + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + break; + default: + return llvm::make_error<GenericError>( + "unrecognized stab symbol '" + stabSym.name + "'"); + } + } + llvm::dbgs() << "Adding to stabsList: " << stab << "\n"; + stabsList.push_back(stab); + } + + file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList))); + + // FIXME: Kill this off when we fix YAML memory ownership. + file.debugInfo()->setAllocator(std::move(allocator)); + + return llvm::Error::success(); +} + +static llvm::DataExtractor +dataExtractorFromSection(const NormalizedFile &normalizedFile, + const Section &S) { + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + StringRef SecData(reinterpret_cast<const char*>(S.content.data()), + S.content.size()); + return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4); +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData, + uint64_t abbrCode) { + uint64_t curCode; + uint32_t offset = 0; + while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) { + // Tag + abbrevData.getULEB128(&offset); + // DW_CHILDREN + abbrevData.getU8(&offset); + // Attributes + while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset)) + ; + } + return offset; +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static Expected<const char *> +getIndexedString(const NormalizedFile &normalizedFile, + llvm::dwarf::Form form, llvm::DataExtractor infoData, + uint32_t &infoOffset, const Section &stringsSection) { + if (form == llvm::dwarf::DW_FORM_string) + return infoData.getCStr(&infoOffset); + if (form != llvm::dwarf::DW_FORM_strp) + return llvm::make_error<GenericError>( + "string field encoded without DW_FORM_strp"); + uint32_t stringOffset = infoData.getU32(&infoOffset); + llvm::DataExtractor stringsData = + dataExtractorFromSection(normalizedFile, stringsSection); + return stringsData.getCStr(&stringOffset); +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static llvm::Expected<TranslationUnitSource> +readCompUnit(const NormalizedFile &normalizedFile, + const Section &info, + const Section &abbrev, + const Section &strings, + StringRef path) { + // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE + // inspection" code if possible. + uint32_t offset = 0; + llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32; + auto infoData = dataExtractorFromSection(normalizedFile, info); + uint32_t length = infoData.getU32(&offset); + if (length == 0xffffffff) { + Format = llvm::dwarf::DwarfFormat::DWARF64; + infoData.getU64(&offset); + } + else if (length > 0xffffff00) + return llvm::make_error<GenericError>("Malformed DWARF in " + path); + + uint16_t version = infoData.getU16(&offset); + + if (version < 2 || version > 4) + return llvm::make_error<GenericError>("Unsupported DWARF version in " + + path); + + infoData.getU32(&offset); // Abbrev offset (should be zero) + uint8_t addrSize = infoData.getU8(&offset); + + uint32_t abbrCode = infoData.getULEB128(&offset); + auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev); + uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode); + uint64_t tag = abbrevData.getULEB128(&abbrevOffset); + if (tag != llvm::dwarf::DW_TAG_compile_unit) + return llvm::make_error<GenericError>("top level DIE is not a compile unit"); + // DW_CHILDREN + abbrevData.getU8(&abbrevOffset); + uint32_t name; + llvm::dwarf::Form form; + TranslationUnitSource tu; + while ((name = abbrevData.getULEB128(&abbrevOffset)) | + (form = static_cast<llvm::dwarf::Form>( + abbrevData.getULEB128(&abbrevOffset))) && + (name != 0 || form != 0)) { + switch (name) { + case llvm::dwarf::DW_AT_name: { + if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, + strings)) + tu.name = *eName; + else + return eName.takeError(); + break; + } + case llvm::dwarf::DW_AT_comp_dir: { + if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, + strings)) + tu.path = *eName; + else + return eName.takeError(); + break; + } + default: + llvm::DWARFFormValue::skipValue(form, infoData, &offset, version, + addrSize, Format); + } + } + return tu; +} + +llvm::Error parseDebugInfo(MachOFile &file, + const NormalizedFile &normalizedFile, bool copyRefs) { + + // Find the interesting debug info sections. + const Section *debugInfo = nullptr; + const Section *debugAbbrev = nullptr; + const Section *debugStrings = nullptr; + + for (auto &s : normalizedFile.sections) { + if (s.segmentName == "__DWARF") { + if (s.sectionName == "__debug_info") + debugInfo = &s; + else if (s.sectionName == "__debug_abbrev") + debugAbbrev = &s; + else if (s.sectionName == "__debug_str") + debugStrings = &s; + } + } + + if (!debugInfo) + return parseStabs(file, normalizedFile, copyRefs); + + if (debugInfo->content.size() == 0) + return llvm::Error::success(); + + if (debugInfo->content.size() < 12) + return llvm::make_error<GenericError>("Malformed __debug_info section in " + + file.path() + ": too small"); + + if (!debugAbbrev) + return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " + + file.path()); + + if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev, + *debugStrings, file.path())) { + // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML + // memory ownership. + std::unique_ptr<BumpPtrAllocator> allocator; + if (copyRefs) { + allocator = llvm::make_unique<BumpPtrAllocator>(); + tuOrErr->name = copyDebugString(tuOrErr->name, *allocator); + tuOrErr->path = copyDebugString(tuOrErr->path, *allocator); + } + file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr))); + if (copyRefs) + file.debugInfo()->setAllocator(std::move(allocator)); + } else + return tuOrErr.takeError(); + + return llvm::Error::success(); +} + +static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { + if (is64) + return read64(addr, isBig); + + int32_t res = read32(addr, isBig); + return res; +} + +/// --- Augmentation String Processing --- + +struct CIEInfo { + bool _augmentationDataPresent = false; + bool _mayHaveEH = false; + uint32_t _offsetOfLSDA = ~0U; + uint32_t _offsetOfPersonality = ~0U; + uint32_t _offsetOfFDEPointerEncoding = ~0U; + uint32_t _augmentationDataLength = ~0U; +}; + +typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap; + +static llvm::Error processAugmentationString(const uint8_t *augStr, + CIEInfo &cieInfo, + unsigned &len) { + + if (augStr[0] == '\0') { + len = 1; + return llvm::Error::success(); + } + + if (augStr[0] != 'z') + return llvm::make_error<GenericError>("expected 'z' at start of " + "augmentation string"); + + cieInfo._augmentationDataPresent = true; + uint64_t idx = 1; + + uint32_t offsetInAugmentationData = 0; + while (augStr[idx] != '\0') { + if (augStr[idx] == 'L') { + cieInfo._offsetOfLSDA = offsetInAugmentationData; + // This adds a single byte to the augmentation data. + ++offsetInAugmentationData; + ++idx; + continue; + } + if (augStr[idx] == 'P') { + cieInfo._offsetOfPersonality = offsetInAugmentationData; + // This adds a single byte to the augmentation data for the encoding, + // then a number of bytes for the pointer data. + // FIXME: We are assuming 4 is correct here for the pointer size as we + // always currently use delta32ToGOT. + offsetInAugmentationData += 5; + ++idx; + continue; + } + if (augStr[idx] == 'R') { + cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; + // This adds a single byte to the augmentation data. + ++offsetInAugmentationData; + ++idx; + continue; + } + if (augStr[idx] == 'e') { + if (augStr[idx + 1] != 'h') + return llvm::make_error<GenericError>("expected 'eh' in " + "augmentation string"); + cieInfo._mayHaveEH = true; + idx += 2; + continue; + } + ++idx; + } + + cieInfo._augmentationDataLength = offsetInAugmentationData; + + len = idx + 1; + return llvm::Error::success(); +} + +static llvm::Error processCIE(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler, + const Section *ehFrameSection, + MachODefinedAtom *atom, + uint64_t offset, + CIEInfoMap &cieInfos) { + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + const uint8_t *frameData = atom->rawContent().data(); + + CIEInfo cieInfo; + + uint32_t size = read32(frameData, isBig); + uint64_t cieIDField = size == 0xffffffffU + ? sizeof(uint32_t) + sizeof(uint64_t) + : sizeof(uint32_t); + uint64_t versionField = cieIDField + sizeof(uint32_t); + uint64_t augmentationStringField = versionField + sizeof(uint8_t); + + unsigned augmentationStringLength = 0; + if (auto err = processAugmentationString(frameData + augmentationStringField, + cieInfo, augmentationStringLength)) + return err; + + if (cieInfo._offsetOfPersonality != ~0U) { + // If we have augmentation data for the personality function, then we may + // need to implicitly generate its relocation. + + // Parse the EH Data field which is pointer sized. + uint64_t EHDataField = augmentationStringField + augmentationStringLength; + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0); + + // Parse Code Align Factor which is a ULEB128. + uint64_t CodeAlignField = EHDataField + EHDataFieldSize; + unsigned lengthFieldSize = 0; + llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); + + // Parse Data Align Factor which is a SLEB128. + uint64_t DataAlignField = CodeAlignField + lengthFieldSize; + llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); + + // Parse Return Address Register which is a byte. + uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; + + // Parse the augmentation length which is a ULEB128. + uint64_t AugmentationLengthField = ReturnAddressField + 1; + uint64_t AugmentationLength = + llvm::decodeULEB128(frameData + AugmentationLengthField, + &lengthFieldSize); + + if (AugmentationLength != cieInfo._augmentationDataLength) + return llvm::make_error<GenericError>("CIE augmentation data length " + "mismatch"); + + // Get the start address of the augmentation data. + uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; + + // Parse the personality function from the augmentation data. + uint64_t PersonalityField = + AugmentationDataField + cieInfo._offsetOfPersonality; + + // Parse the personality encoding. + // FIXME: Verify that this is a 32-bit pcrel offset. + uint64_t PersonalityFunctionField = PersonalityField + 1; + + if (atom->begin() != atom->end()) { + // If we have an explicit relocation, then make sure it matches this + // offset as this is where we'd expect it to be applied to. + DefinedAtom::reference_iterator CurrentRef = atom->begin(); + if (CurrentRef->offsetInAtom() != PersonalityFunctionField) + return llvm::make_error<GenericError>("CIE personality reloc at " + "wrong offset"); + + if (++CurrentRef != atom->end()) + return llvm::make_error<GenericError>("CIE contains too many relocs"); + } else { + // Implicitly generate the personality function reloc. It's assumed to + // be a delta32 offset to a GOT entry. + // FIXME: Parse the encoding and check this. + int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); + uint64_t funcAddress = ehFrameSection->address + offset + + PersonalityFunctionField; + funcAddress += funcDelta; + + const MachODefinedAtom *func = nullptr; + Reference::Addend addend; + func = findAtomCoveringAddress(normalizedFile, file, funcAddress, + addend); + atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), + handler.unwindRefToPersonalityFunctionKind(), + PersonalityFunctionField, func, addend); + } + } else if (atom->begin() != atom->end()) { + // Otherwise, we expect there to be no relocations in this atom as the only + // relocation would have been to the personality function. + return llvm::make_error<GenericError>("unexpected relocation in CIE"); + } + + + cieInfos[atom] = std::move(cieInfo); + + return llvm::Error::success(); +} + +static llvm::Error processFDE(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler, + const Section *ehFrameSection, + MachODefinedAtom *atom, + uint64_t offset, + const CIEInfoMap &cieInfos) { + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + + // Compiler wasn't lazy and actually told us what it meant. + // Unfortunately, the compiler may not have generated references for all of + // [cie, func, lsda] and so we still need to parse the FDE and add references + // for any the compiler didn't generate. + if (atom->begin() != atom->end()) + atom->sortReferences(); + + DefinedAtom::reference_iterator CurrentRef = atom->begin(); + + // This helper returns the reference (if one exists) at the offset we are + // currently processing. It automatically increments the ref iterator if we + // do return a ref, and throws an error if we pass over a ref without + // comsuming it. + auto currentRefGetter = [&CurrentRef, + &atom](uint64_t Offset)->const Reference* { + // If there are no more refs found, then we are done. + if (CurrentRef == atom->end()) + return nullptr; + + const Reference *Ref = *CurrentRef; + + // If we haven't reached the offset for this reference, then return that + // we don't yet have a reference to process. + if (Offset < Ref->offsetInAtom()) + return nullptr; + + // If the offset is equal, then we want to process this ref. + if (Offset == Ref->offsetInAtom()) { + ++CurrentRef; + return Ref; + } + + // The current ref is at an offset which is earlier than the current + // offset, then we failed to consume it when we should have. In this case + // throw an error. + llvm::report_fatal_error("Skipped reference when processing FDE"); + }; + + // Helper to either get the reference at this current location, and verify + // that it is of the expected type, or add a reference of that type. + // Returns the reference target. + auto verifyOrAddReference = [&](uint64_t targetAddress, + Reference::KindValue refKind, + uint64_t refAddress, + bool allowsAddend)->const Atom* { + if (auto *ref = currentRefGetter(refAddress)) { + // The compiler already emitted a relocation for the CIE ref. This should + // have been converted to the correct type of reference in + // get[Pair]ReferenceInfo(). + assert(ref->kindValue() == refKind && + "Incorrect EHFrame reference kind"); + return ref->target(); + } + Reference::Addend addend; + auto *target = findAtomCoveringAddress(normalizedFile, file, + targetAddress, addend); + atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), + refKind, refAddress, target, addend); + + if (!allowsAddend) + assert(!addend && "EHFrame reference cannot have addend"); + return target; + }; + + const uint8_t *startFrameData = atom->rawContent().data(); + const uint8_t *frameData = startFrameData; + + uint32_t size = read32(frameData, isBig); + uint64_t cieFieldInFDE = size == 0xffffffffU + ? sizeof(uint32_t) + sizeof(uint64_t) + : sizeof(uint32_t); + + // Linker needs to fixup a reference from the FDE to its parent CIE (a + // 32-bit byte offset backwards in the __eh_frame section). + uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); + uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; + cieAddress -= cieDelta; + + auto *cieRefTarget = verifyOrAddReference(cieAddress, + handler.unwindRefToCIEKind(), + cieFieldInFDE, false); + const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget); + assert(cie && cie->contentType() == DefinedAtom::typeCFI && + "FDE's CIE field does not point at the start of a CIE."); + + const CIEInfo &cieInfo = cieInfos.find(cie)->second; + + // Linker needs to fixup reference from the FDE to the function it's + // describing. FIXME: there are actually different ways to do this, and the + // particular method used is specified in the CIE's augmentation fields + // (hopefully) + uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); + + int64_t functionFromFDE = readSPtr(is64, isBig, + frameData + rangeFieldInFDE); + uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; + rangeStart += functionFromFDE; + + verifyOrAddReference(rangeStart, + handler.unwindRefToFunctionKind(), + rangeFieldInFDE, true); + + // Handle the augmentation data if there is any. + if (cieInfo._augmentationDataPresent) { + // First process the augmentation data length field. + uint64_t augmentationDataLengthFieldInFDE = + rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t)); + unsigned lengthFieldSize = 0; + uint64_t augmentationDataLength = + llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, + &lengthFieldSize); + + if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) { + + // Look at the augmentation data field. + uint64_t augmentationDataFieldInFDE = + augmentationDataLengthFieldInFDE + lengthFieldSize; + + int64_t lsdaFromFDE = readSPtr(is64, isBig, + frameData + augmentationDataFieldInFDE); + uint64_t lsdaStart = + ehFrameSection->address + offset + augmentationDataFieldInFDE + + lsdaFromFDE; + + verifyOrAddReference(lsdaStart, + handler.unwindRefToFunctionKind(), + augmentationDataFieldInFDE, true); + } + } + + return llvm::Error::success(); +} + +llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler) { + + const Section *ehFrameSection = nullptr; + for (auto §ion : normalizedFile.sections) + if (section.segmentName == "__TEXT" && + section.sectionName == "__eh_frame") { + ehFrameSection = §ion; + break; + } + + // No __eh_frame so nothing to do. + if (!ehFrameSection) + return llvm::Error::success(); + + llvm::Error ehFrameErr = llvm::Error::success(); + CIEInfoMap cieInfos; + + file.eachAtomInSection(*ehFrameSection, + [&](MachODefinedAtom *atom, uint64_t offset) -> void { + assert(atom->contentType() == DefinedAtom::typeCFI); + + // Bail out if we've encountered an error. + if (ehFrameErr) + return; + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + if (ArchHandler::isDwarfCIE(isBig, atom)) + ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, + atom, offset, cieInfos); + else + ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, + atom, offset, cieInfos); + }); + + return ehFrameErr; +} + +llvm::Error parseObjCImageInfo(const Section §, + const NormalizedFile &normalizedFile, + MachOFile &file) { + + // struct objc_image_info { + // uint32_t version; // initially 0 + // uint32_t flags; + // }; + + ArrayRef<uint8_t> content = sect.content; + if (content.size() != 8) + return llvm::make_error<GenericError>(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " should be 8 bytes in size"); + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + uint32_t version = read32(content.data(), isBig); + if (version) + return llvm::make_error<GenericError>(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " should have version=0"); + + uint32_t flags = read32(content.data() + 4, isBig); + if (flags & (MachOLinkingContext::objc_supports_gc | + MachOLinkingContext::objc_gc_only)) + return llvm::make_error<GenericError>(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " uses GC. This is not supported"); + + if (flags & MachOLinkingContext::objc_retainReleaseForSimulator) + file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); + else + file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); + + file.setSwiftVersion((flags >> 8) & 0xFF); + + return llvm::Error::success(); +} + +/// Converts normalized mach-o file into an lld::File and lld::Atoms. +llvm::Expected<std::unique_ptr<lld::File>> +objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + std::unique_ptr<MachOFile> file(new MachOFile(path)); + if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) + return std::move(ec); + return std::unique_ptr<File>(std::move(file)); +} + +llvm::Expected<std::unique_ptr<lld::File>> +dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + // Instantiate SharedLibraryFile object. + std::unique_ptr<MachODylibFile> file(new MachODylibFile(path)); + if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) + return std::move(ec); + return std::unique_ptr<File>(std::move(file)); +} + +} // anonymous namespace + +namespace normalized { + +static bool isObjCImageInfo(const Section §) { + return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || + (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo"); +} + +llvm::Error +normalizedObjectToAtoms(MachOFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " + << file->path() << "\n"); + bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); + + // Create atoms from each section. + for (auto § : normalizedFile.sections) { + + // If this is a debug-info section parse it specially. + if (isDebugInfoSection(sect)) + continue; + + // If the file contains an objc_image_info struct, then we should parse the + // ObjC flags and Swift version. + if (isObjCImageInfo(sect)) { + if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) + return ec; + // We then skip adding atoms for this section as we use the ObjCPass to + // re-emit this data after it has been aggregated for all files. + continue; + } + + bool customSectionName; + DefinedAtom::ContentType atomType = atomTypeFromSection(sect, + customSectionName); + if (auto ec = processSection(atomType, sect, customSectionName, + normalizedFile, *file, scatterable, copyRefs)) + return ec; + } + // Create atoms from undefined symbols. + for (auto &sym : normalizedFile.undefinedSymbols) { + // Undefinded symbols with n_value != 0 are actually tentative definitions. + if (sym.value == Hex64(0)) { + file->addUndefinedAtom(sym.name, copyRefs); + } else { + file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, + DefinedAtom::Alignment(1 << (sym.desc >> 8)), + copyRefs); + } + } + + // Convert mach-o relocations to References + std::unique_ptr<mach_o::ArchHandler> handler + = ArchHandler::create(normalizedFile.arch); + for (auto § : normalizedFile.sections) { + if (isDebugInfoSection(sect)) + continue; + if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable, + *file, *handler)) + return ec; + } + + // Add additional arch-specific References + file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { + handler->addAdditionalReferences(*atom); + }); + + // Each __eh_frame section needs references to both __text (the function we're + // providing unwind info for) and itself (FDE -> CIE). These aren't + // represented in the relocations on some architectures, so we have to add + // them back in manually there. + if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) + return ec; + + // Process mach-o data-in-code regions array. That information is encoded in + // atoms as References at each transition point. + unsigned nextIndex = 0; + for (const DataInCode &entry : normalizedFile.dataInCode) { + ++nextIndex; + const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); + if (!s) { + return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address (" + + Twine(entry.offset) + + ") is not in any section")); + } + uint64_t offsetInSect = entry.offset - s->address; + uint32_t offsetInAtom; + MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, + &offsetInAtom); + if (offsetInAtom + entry.length > atom->size()) { + return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry " + "(offset=" + + Twine(entry.offset) + + ", length=" + + Twine(entry.length) + + ") crosses atom boundary.")); + } + // Add reference that marks start of data-in-code. + atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), + handler->dataInCodeTransitionStart(*atom), + offsetInAtom, atom, entry.kind); + + // Peek at next entry, if it starts where this one ends, skip ending ref. + if (nextIndex < normalizedFile.dataInCode.size()) { + const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; + if (nextEntry.offset == (entry.offset + entry.length)) + continue; + } + + // If data goes to end of function, skip ending ref. + if ((offsetInAtom + entry.length) == atom->size()) + continue; + + // Add reference that marks end of data-in-code. + atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), + handler->dataInCodeTransitionEnd(*atom), + offsetInAtom+entry.length, atom, 0); + } + + // Cache some attributes on the file for use later. + file->setFlags(normalizedFile.flags); + file->setArch(normalizedFile.arch); + file->setOS(normalizedFile.os); + file->setMinVersion(normalizedFile.minOSverson); + file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); + + // Sort references in each atom to their canonical order. + for (const DefinedAtom* defAtom : file->defined()) { + reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); + } + + if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs)) + return err; + + return llvm::Error::success(); +} + +llvm::Error +normalizedDylibToAtoms(MachODylibFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + file->setInstallName(normalizedFile.installName); + file->setCompatVersion(normalizedFile.compatVersion); + file->setCurrentVersion(normalizedFile.currentVersion); + + // Tell MachODylibFile object about all symbols it exports. + if (!normalizedFile.exportInfo.empty()) { + // If exports trie exists, use it instead of traditional symbol table. + for (const Export &exp : normalizedFile.exportInfo) { + bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + // StringRefs from export iterator are ephemeral, so force copy. + file->addExportedSymbol(exp.name, weakDef, true); + } + } else { + for (auto &sym : normalizedFile.globalSymbols) { + assert((sym.scope & N_EXT) && "only expect external symbols here"); + bool weakDef = (sym.desc & N_WEAK_DEF); + file->addExportedSymbol(sym.name, weakDef, copyRefs); + } + } + // Tell MachODylibFile object about all dylibs it re-exports. + for (const DependentDylib &dep : normalizedFile.dependentDylibs) { + if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) + file->addReExportedDylib(dep.path); + } + return llvm::Error::success(); +} + +void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, + StringRef &segmentName, + StringRef §ionName, + SectionType §ionType, + SectionAttr §ionAttrs, + bool &relocsToDefinedCanBeImplicit) { + + for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; + p->atomType != DefinedAtom::typeUnknown; ++p) { + if (p->atomType != atomType) + continue; + // Wild carded entries are ignored for reverse lookups. + if (p->segmentName.empty() || p->sectionName.empty()) + continue; + segmentName = p->segmentName; + sectionName = p->sectionName; + sectionType = p->sectionType; + sectionAttrs = 0; + relocsToDefinedCanBeImplicit = false; + if (atomType == DefinedAtom::typeCode) + sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; + if (atomType == DefinedAtom::typeCFI) + relocsToDefinedCanBeImplicit = true; + return; + } + llvm_unreachable("content type not yet supported"); +} + +llvm::Expected<std::unique_ptr<lld::File>> +normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + switch (normalizedFile.fileType) { + case MH_DYLIB: + case MH_DYLIB_STUB: + return dylibToAtoms(normalizedFile, path, copyRefs); + case MH_OBJECT: + return objectToAtoms(normalizedFile, path, copyRefs); + default: + llvm_unreachable("unhandled MachO file type!"); + } +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp new file mode 100644 index 000000000000..218170965eca --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp @@ -0,0 +1,845 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation uses YAML I/O to +/// provide the convert between YAML and the normalized mach-o (NM). +/// +/// +------------+ +------+ +/// | normalized | <-> | yaml | +/// +------------+ +------+ + +#include "MachONormalizedFile.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/ReaderWriter/YamlContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include <system_error> + + +using llvm::StringRef; +using namespace llvm::yaml; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; +using lld::YamlContext; + +LLVM_YAML_IS_SEQUENCE_VECTOR(Segment) +LLVM_YAML_IS_SEQUENCE_VECTOR(DependentDylib) +LLVM_YAML_IS_SEQUENCE_VECTOR(RebaseLocation) +LLVM_YAML_IS_SEQUENCE_VECTOR(BindLocation) +LLVM_YAML_IS_SEQUENCE_VECTOR(Export) +LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef) +LLVM_YAML_IS_SEQUENCE_VECTOR(DataInCode) + + +// for compatibility with gcc-4.7 in C++11 mode, add extra namespace +namespace llvm { +namespace yaml { + +// A vector of Sections is a sequence. +template<> +struct SequenceTraits< std::vector<Section> > { + static size_t size(IO &io, std::vector<Section> &seq) { + return seq.size(); + } + static Section& element(IO &io, std::vector<Section> &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +template<> +struct SequenceTraits< std::vector<Symbol> > { + static size_t size(IO &io, std::vector<Symbol> &seq) { + return seq.size(); + } + static Symbol& element(IO &io, std::vector<Symbol> &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +// A vector of Relocations is a sequence. +template<> +struct SequenceTraits< Relocations > { + static size_t size(IO &io, Relocations &seq) { + return seq.size(); + } + static Relocation& element(IO &io, Relocations &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +// The content for a section is represented as a flow sequence of hex bytes. +template<> +struct SequenceTraits< ContentBytes > { + static size_t size(IO &io, ContentBytes &seq) { + return seq.size(); + } + static Hex8& element(IO &io, ContentBytes &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } + static const bool flow = true; +}; + +// The indirect symbols for a section is represented as a flow sequence +// of numbers (symbol table indexes). +template<> +struct SequenceTraits< IndirectSymbols > { + static size_t size(IO &io, IndirectSymbols &seq) { + return seq.size(); + } + static uint32_t& element(IO &io, IndirectSymbols &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } + static const bool flow = true; +}; + +template <> +struct ScalarEnumerationTraits<lld::MachOLinkingContext::Arch> { + static void enumeration(IO &io, lld::MachOLinkingContext::Arch &value) { + io.enumCase(value, "unknown",lld::MachOLinkingContext::arch_unknown); + io.enumCase(value, "ppc", lld::MachOLinkingContext::arch_ppc); + io.enumCase(value, "x86", lld::MachOLinkingContext::arch_x86); + io.enumCase(value, "x86_64", lld::MachOLinkingContext::arch_x86_64); + io.enumCase(value, "armv6", lld::MachOLinkingContext::arch_armv6); + io.enumCase(value, "armv7", lld::MachOLinkingContext::arch_armv7); + io.enumCase(value, "armv7s", lld::MachOLinkingContext::arch_armv7s); + io.enumCase(value, "arm64", lld::MachOLinkingContext::arch_arm64); + } +}; + +template <> +struct ScalarEnumerationTraits<lld::MachOLinkingContext::OS> { + static void enumeration(IO &io, lld::MachOLinkingContext::OS &value) { + io.enumCase(value, "unknown", + lld::MachOLinkingContext::OS::unknown); + io.enumCase(value, "Mac OS X", + lld::MachOLinkingContext::OS::macOSX); + io.enumCase(value, "iOS", + lld::MachOLinkingContext::OS::iOS); + io.enumCase(value, "iOS Simulator", + lld::MachOLinkingContext::OS::iOS_simulator); + } +}; + + +template <> +struct ScalarEnumerationTraits<HeaderFileType> { + static void enumeration(IO &io, HeaderFileType &value) { + io.enumCase(value, "MH_OBJECT", llvm::MachO::MH_OBJECT); + io.enumCase(value, "MH_DYLIB", llvm::MachO::MH_DYLIB); + io.enumCase(value, "MH_EXECUTE", llvm::MachO::MH_EXECUTE); + io.enumCase(value, "MH_BUNDLE", llvm::MachO::MH_BUNDLE); + } +}; + + +template <> +struct ScalarBitSetTraits<FileFlags> { + static void bitset(IO &io, FileFlags &value) { + io.bitSetCase(value, "MH_TWOLEVEL", + llvm::MachO::MH_TWOLEVEL); + io.bitSetCase(value, "MH_SUBSECTIONS_VIA_SYMBOLS", + llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + } +}; + + +template <> +struct ScalarEnumerationTraits<SectionType> { + static void enumeration(IO &io, SectionType &value) { + io.enumCase(value, "S_REGULAR", + llvm::MachO::S_REGULAR); + io.enumCase(value, "S_ZEROFILL", + llvm::MachO::S_ZEROFILL); + io.enumCase(value, "S_CSTRING_LITERALS", + llvm::MachO::S_CSTRING_LITERALS); + io.enumCase(value, "S_4BYTE_LITERALS", + llvm::MachO::S_4BYTE_LITERALS); + io.enumCase(value, "S_8BYTE_LITERALS", + llvm::MachO::S_8BYTE_LITERALS); + io.enumCase(value, "S_LITERAL_POINTERS", + llvm::MachO::S_LITERAL_POINTERS); + io.enumCase(value, "S_NON_LAZY_SYMBOL_POINTERS", + llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS); + io.enumCase(value, "S_LAZY_SYMBOL_POINTERS", + llvm::MachO::S_LAZY_SYMBOL_POINTERS); + io.enumCase(value, "S_SYMBOL_STUBS", + llvm::MachO::S_SYMBOL_STUBS); + io.enumCase(value, "S_MOD_INIT_FUNC_POINTERS", + llvm::MachO::S_MOD_INIT_FUNC_POINTERS); + io.enumCase(value, "S_MOD_TERM_FUNC_POINTERS", + llvm::MachO::S_MOD_TERM_FUNC_POINTERS); + io.enumCase(value, "S_COALESCED", + llvm::MachO::S_COALESCED); + io.enumCase(value, "S_GB_ZEROFILL", + llvm::MachO::S_GB_ZEROFILL); + io.enumCase(value, "S_INTERPOSING", + llvm::MachO::S_INTERPOSING); + io.enumCase(value, "S_16BYTE_LITERALS", + llvm::MachO::S_16BYTE_LITERALS); + io.enumCase(value, "S_DTRACE_DOF", + llvm::MachO::S_DTRACE_DOF); + io.enumCase(value, "S_LAZY_DYLIB_SYMBOL_POINTERS", + llvm::MachO::S_LAZY_DYLIB_SYMBOL_POINTERS); + io.enumCase(value, "S_THREAD_LOCAL_REGULAR", + llvm::MachO::S_THREAD_LOCAL_REGULAR); + io.enumCase(value, "S_THREAD_LOCAL_ZEROFILL", + llvm::MachO::S_THREAD_LOCAL_ZEROFILL); + io.enumCase(value, "S_THREAD_LOCAL_VARIABLES", + llvm::MachO::S_THREAD_LOCAL_VARIABLES); + io.enumCase(value, "S_THREAD_LOCAL_VARIABLE_POINTERS", + llvm::MachO::S_THREAD_LOCAL_VARIABLE_POINTERS); + io.enumCase(value, "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS", + llvm::MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); + } +}; + +template <> +struct ScalarBitSetTraits<SectionAttr> { + static void bitset(IO &io, SectionAttr &value) { + io.bitSetCase(value, "S_ATTR_PURE_INSTRUCTIONS", + llvm::MachO::S_ATTR_PURE_INSTRUCTIONS); + io.bitSetCase(value, "S_ATTR_SOME_INSTRUCTIONS", + llvm::MachO::S_ATTR_SOME_INSTRUCTIONS); + io.bitSetCase(value, "S_ATTR_NO_DEAD_STRIP", + llvm::MachO::S_ATTR_NO_DEAD_STRIP); + io.bitSetCase(value, "S_ATTR_EXT_RELOC", + llvm::MachO::S_ATTR_EXT_RELOC); + io.bitSetCase(value, "S_ATTR_LOC_RELOC", + llvm::MachO::S_ATTR_LOC_RELOC); + io.bitSetCase(value, "S_ATTR_DEBUG", + llvm::MachO::S_ATTR_DEBUG); + } +}; + +/// This is a custom formatter for SectionAlignment. Values are +/// the power to raise by, ie, the n in 2^n. +template <> struct ScalarTraits<SectionAlignment> { + static void output(const SectionAlignment &value, void *ctxt, + raw_ostream &out) { + out << llvm::format("%d", (uint32_t)value); + } + + static StringRef input(StringRef scalar, void *ctxt, + SectionAlignment &value) { + uint32_t alignment; + if (scalar.getAsInteger(0, alignment)) { + return "malformed alignment value"; + } + if (!llvm::isPowerOf2_32(alignment)) + return "alignment must be a power of 2"; + value = alignment; + return StringRef(); // returning empty string means success + } + + static bool mustQuote(StringRef) { return false; } +}; + +template <> +struct ScalarEnumerationTraits<NListType> { + static void enumeration(IO &io, NListType &value) { + io.enumCase(value, "N_UNDF", llvm::MachO::N_UNDF); + io.enumCase(value, "N_ABS", llvm::MachO::N_ABS); + io.enumCase(value, "N_SECT", llvm::MachO::N_SECT); + io.enumCase(value, "N_PBUD", llvm::MachO::N_PBUD); + io.enumCase(value, "N_INDR", llvm::MachO::N_INDR); + } +}; + +template <> +struct ScalarBitSetTraits<SymbolScope> { + static void bitset(IO &io, SymbolScope &value) { + io.bitSetCase(value, "N_EXT", llvm::MachO::N_EXT); + io.bitSetCase(value, "N_PEXT", llvm::MachO::N_PEXT); + } +}; + +template <> +struct ScalarBitSetTraits<SymbolDesc> { + static void bitset(IO &io, SymbolDesc &value) { + io.bitSetCase(value, "N_NO_DEAD_STRIP", llvm::MachO::N_NO_DEAD_STRIP); + io.bitSetCase(value, "N_WEAK_REF", llvm::MachO::N_WEAK_REF); + io.bitSetCase(value, "N_WEAK_DEF", llvm::MachO::N_WEAK_DEF); + io.bitSetCase(value, "N_ARM_THUMB_DEF", llvm::MachO::N_ARM_THUMB_DEF); + io.bitSetCase(value, "N_SYMBOL_RESOLVER", llvm::MachO::N_SYMBOL_RESOLVER); + } +}; + + +template <> +struct MappingTraits<Section> { + struct NormalizedContentBytes; + static void mapping(IO &io, Section §) { + io.mapRequired("segment", sect.segmentName); + io.mapRequired("section", sect.sectionName); + io.mapRequired("type", sect.type); + io.mapOptional("attributes", sect.attributes); + io.mapOptional("alignment", sect.alignment, (SectionAlignment)1); + io.mapRequired("address", sect.address); + if (isZeroFillSection(sect.type)) { + // S_ZEROFILL sections use "size:" instead of "content:" + uint64_t size = sect.content.size(); + io.mapOptional("size", size); + if (!io.outputting()) { + uint8_t *bytes = nullptr; + sect.content = makeArrayRef(bytes, size); + } + } else { + MappingNormalization<NormalizedContent, ArrayRef<uint8_t>> content( + io, sect.content); + io.mapOptional("content", content->_normalizedContent); + } + io.mapOptional("relocations", sect.relocations); + io.mapOptional("indirect-syms", sect.indirectSymbols); + } + + struct NormalizedContent { + NormalizedContent(IO &io) : _io(io) {} + NormalizedContent(IO &io, ArrayRef<uint8_t> content) : _io(io) { + // When writing yaml, copy content byte array to Hex8 vector. + for (auto &c : content) { + _normalizedContent.push_back(c); + } + } + ArrayRef<uint8_t> denormalize(IO &io) { + // When reading yaml, allocate byte array owned by NormalizedFile and + // copy Hex8 vector to byte array. + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + NormalizedFile *file = info->_normalizeMachOFile; + assert(file != nullptr); + size_t size = _normalizedContent.size(); + if (!size) + return None; + uint8_t *bytes = file->ownedAllocations.Allocate<uint8_t>(size); + std::copy(_normalizedContent.begin(), _normalizedContent.end(), bytes); + return makeArrayRef(bytes, size); + } + + IO &_io; + ContentBytes _normalizedContent; + }; +}; + + +template <> +struct MappingTraits<Relocation> { + static void mapping(IO &io, Relocation &reloc) { + io.mapRequired("offset", reloc.offset); + io.mapOptional("scattered", reloc.scattered, false); + io.mapRequired("type", reloc.type); + io.mapRequired("length", reloc.length); + io.mapRequired("pc-rel", reloc.pcRel); + if ( !reloc.scattered ) + io.mapRequired("extern", reloc.isExtern); + if ( reloc.scattered ) + io.mapRequired("value", reloc.value); + if ( !reloc.scattered ) + io.mapRequired("symbol", reloc.symbol); + } +}; + + +template <> +struct ScalarEnumerationTraits<RelocationInfoType> { + static void enumeration(IO &io, RelocationInfoType &value) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + NormalizedFile *file = info->_normalizeMachOFile; + assert(file != nullptr); + switch (file->arch) { + case lld::MachOLinkingContext::arch_x86_64: + io.enumCase(value, "X86_64_RELOC_UNSIGNED", + llvm::MachO::X86_64_RELOC_UNSIGNED); + io.enumCase(value, "X86_64_RELOC_SIGNED", + llvm::MachO::X86_64_RELOC_SIGNED); + io.enumCase(value, "X86_64_RELOC_BRANCH", + llvm::MachO::X86_64_RELOC_BRANCH); + io.enumCase(value, "X86_64_RELOC_GOT_LOAD", + llvm::MachO::X86_64_RELOC_GOT_LOAD); + io.enumCase(value, "X86_64_RELOC_GOT", + llvm::MachO::X86_64_RELOC_GOT); + io.enumCase(value, "X86_64_RELOC_SUBTRACTOR", + llvm::MachO::X86_64_RELOC_SUBTRACTOR); + io.enumCase(value, "X86_64_RELOC_SIGNED_1", + llvm::MachO::X86_64_RELOC_SIGNED_1); + io.enumCase(value, "X86_64_RELOC_SIGNED_2", + llvm::MachO::X86_64_RELOC_SIGNED_2); + io.enumCase(value, "X86_64_RELOC_SIGNED_4", + llvm::MachO::X86_64_RELOC_SIGNED_4); + io.enumCase(value, "X86_64_RELOC_TLV", + llvm::MachO::X86_64_RELOC_TLV); + break; + case lld::MachOLinkingContext::arch_x86: + io.enumCase(value, "GENERIC_RELOC_VANILLA", + llvm::MachO::GENERIC_RELOC_VANILLA); + io.enumCase(value, "GENERIC_RELOC_PAIR", + llvm::MachO::GENERIC_RELOC_PAIR); + io.enumCase(value, "GENERIC_RELOC_SECTDIFF", + llvm::MachO::GENERIC_RELOC_SECTDIFF); + io.enumCase(value, "GENERIC_RELOC_LOCAL_SECTDIFF", + llvm::MachO::GENERIC_RELOC_LOCAL_SECTDIFF); + io.enumCase(value, "GENERIC_RELOC_TLV", + llvm::MachO::GENERIC_RELOC_TLV); + break; + case lld::MachOLinkingContext::arch_armv6: + case lld::MachOLinkingContext::arch_armv7: + case lld::MachOLinkingContext::arch_armv7s: + io.enumCase(value, "ARM_RELOC_VANILLA", + llvm::MachO::ARM_RELOC_VANILLA); + io.enumCase(value, "ARM_RELOC_PAIR", + llvm::MachO::ARM_RELOC_PAIR); + io.enumCase(value, "ARM_RELOC_SECTDIFF", + llvm::MachO::ARM_RELOC_SECTDIFF); + io.enumCase(value, "ARM_RELOC_LOCAL_SECTDIFF", + llvm::MachO::ARM_RELOC_LOCAL_SECTDIFF); + io.enumCase(value, "ARM_RELOC_BR24", + llvm::MachO::ARM_RELOC_BR24); + io.enumCase(value, "ARM_THUMB_RELOC_BR22", + llvm::MachO::ARM_THUMB_RELOC_BR22); + io.enumCase(value, "ARM_RELOC_HALF", + llvm::MachO::ARM_RELOC_HALF); + io.enumCase(value, "ARM_RELOC_HALF_SECTDIFF", + llvm::MachO::ARM_RELOC_HALF_SECTDIFF); + break; + case lld::MachOLinkingContext::arch_arm64: + io.enumCase(value, "ARM64_RELOC_UNSIGNED", + llvm::MachO::ARM64_RELOC_UNSIGNED); + io.enumCase(value, "ARM64_RELOC_SUBTRACTOR", + llvm::MachO::ARM64_RELOC_SUBTRACTOR); + io.enumCase(value, "ARM64_RELOC_BRANCH26", + llvm::MachO::ARM64_RELOC_BRANCH26); + io.enumCase(value, "ARM64_RELOC_PAGE21", + llvm::MachO::ARM64_RELOC_PAGE21); + io.enumCase(value, "ARM64_RELOC_PAGEOFF12", + llvm::MachO::ARM64_RELOC_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGE21", + llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGE21); + io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGEOFF12", + llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_POINTER_TO_GOT", + llvm::MachO::ARM64_RELOC_POINTER_TO_GOT); + io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGE21", + llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGE21); + io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", + llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_ADDEND", + llvm::MachO::ARM64_RELOC_ADDEND); + break; + default: + llvm_unreachable("unknown architecture"); + } + } +}; + + +template <> +struct MappingTraits<Symbol> { + static void mapping(IO &io, Symbol& sym) { + io.mapRequired("name", sym.name); + io.mapRequired("type", sym.type); + io.mapOptional("scope", sym.scope, SymbolScope(0)); + io.mapOptional("sect", sym.sect, (uint8_t)0); + if (sym.type == llvm::MachO::N_UNDF) { + // In undef symbols, desc field contains alignment/ordinal info + // which is better represented as a hex vaule. + uint16_t t1 = sym.desc; + Hex16 t2 = t1; + io.mapOptional("desc", t2, Hex16(0)); + sym.desc = t2; + } else { + // In defined symbols, desc fit is a set of option bits. + io.mapOptional("desc", sym.desc, SymbolDesc(0)); + } + io.mapRequired("value", sym.value); + } +}; + +// Custom mapping for VMProtect (e.g. "r-x"). +template <> +struct ScalarTraits<VMProtect> { + static void output(const VMProtect &value, void*, raw_ostream &out) { + out << ( (value & llvm::MachO::VM_PROT_READ) ? 'r' : '-'); + out << ( (value & llvm::MachO::VM_PROT_WRITE) ? 'w' : '-'); + out << ( (value & llvm::MachO::VM_PROT_EXECUTE) ? 'x' : '-'); + } + static StringRef input(StringRef scalar, void*, VMProtect &value) { + value = 0; + if (scalar.size() != 3) + return "segment access protection must be three chars (e.g. \"r-x\")"; + switch (scalar[0]) { + case 'r': + value = llvm::MachO::VM_PROT_READ; + break; + case '-': + break; + default: + return "segment access protection first char must be 'r' or '-'"; + } + switch (scalar[1]) { + case 'w': + value = value | llvm::MachO::VM_PROT_WRITE; + break; + case '-': + break; + default: + return "segment access protection second char must be 'w' or '-'"; + } + switch (scalar[2]) { + case 'x': + value = value | llvm::MachO::VM_PROT_EXECUTE; + break; + case '-': + break; + default: + return "segment access protection third char must be 'x' or '-'"; + } + // Return the empty string on success, + return StringRef(); + } + static bool mustQuote(StringRef) { return false; } +}; + + +template <> +struct MappingTraits<Segment> { + static void mapping(IO &io, Segment& seg) { + io.mapRequired("name", seg.name); + io.mapRequired("address", seg.address); + io.mapRequired("size", seg.size); + io.mapRequired("init-access", seg.init_access); + io.mapRequired("max-access", seg.max_access); + } +}; + +template <> +struct ScalarEnumerationTraits<LoadCommandType> { + static void enumeration(IO &io, LoadCommandType &value) { + io.enumCase(value, "LC_LOAD_DYLIB", + llvm::MachO::LC_LOAD_DYLIB); + io.enumCase(value, "LC_LOAD_WEAK_DYLIB", + llvm::MachO::LC_LOAD_WEAK_DYLIB); + io.enumCase(value, "LC_REEXPORT_DYLIB", + llvm::MachO::LC_REEXPORT_DYLIB); + io.enumCase(value, "LC_LOAD_UPWARD_DYLIB", + llvm::MachO::LC_LOAD_UPWARD_DYLIB); + io.enumCase(value, "LC_LAZY_LOAD_DYLIB", + llvm::MachO::LC_LAZY_LOAD_DYLIB); + io.enumCase(value, "LC_VERSION_MIN_MACOSX", + llvm::MachO::LC_VERSION_MIN_MACOSX); + io.enumCase(value, "LC_VERSION_MIN_IPHONEOS", + llvm::MachO::LC_VERSION_MIN_IPHONEOS); + io.enumCase(value, "LC_VERSION_MIN_TVOS", + llvm::MachO::LC_VERSION_MIN_TVOS); + io.enumCase(value, "LC_VERSION_MIN_WATCHOS", + llvm::MachO::LC_VERSION_MIN_WATCHOS); + } +}; + +template <> +struct MappingTraits<DependentDylib> { + static void mapping(IO &io, DependentDylib& dylib) { + io.mapRequired("path", dylib.path); + io.mapOptional("kind", dylib.kind, + llvm::MachO::LC_LOAD_DYLIB); + io.mapOptional("compat-version", dylib.compatVersion, + PackedVersion(0x10000)); + io.mapOptional("current-version", dylib.currentVersion, + PackedVersion(0x10000)); + } +}; + +template <> +struct ScalarEnumerationTraits<RebaseType> { + static void enumeration(IO &io, RebaseType &value) { + io.enumCase(value, "REBASE_TYPE_POINTER", + llvm::MachO::REBASE_TYPE_POINTER); + io.enumCase(value, "REBASE_TYPE_TEXT_PCREL32", + llvm::MachO::REBASE_TYPE_TEXT_PCREL32); + io.enumCase(value, "REBASE_TYPE_TEXT_ABSOLUTE32", + llvm::MachO::REBASE_TYPE_TEXT_ABSOLUTE32); + } +}; + + +template <> +struct MappingTraits<RebaseLocation> { + static void mapping(IO &io, RebaseLocation& rebase) { + io.mapRequired("segment-index", rebase.segIndex); + io.mapRequired("segment-offset", rebase.segOffset); + io.mapOptional("kind", rebase.kind, + llvm::MachO::REBASE_TYPE_POINTER); + } +}; + + + +template <> +struct ScalarEnumerationTraits<BindType> { + static void enumeration(IO &io, BindType &value) { + io.enumCase(value, "BIND_TYPE_POINTER", + llvm::MachO::BIND_TYPE_POINTER); + io.enumCase(value, "BIND_TYPE_TEXT_ABSOLUTE32", + llvm::MachO::BIND_TYPE_TEXT_ABSOLUTE32); + io.enumCase(value, "BIND_TYPE_TEXT_PCREL32", + llvm::MachO::BIND_TYPE_TEXT_PCREL32); + } +}; + +template <> +struct MappingTraits<BindLocation> { + static void mapping(IO &io, BindLocation &bind) { + io.mapRequired("segment-index", bind.segIndex); + io.mapRequired("segment-offset", bind.segOffset); + io.mapOptional("kind", bind.kind, + llvm::MachO::BIND_TYPE_POINTER); + io.mapOptional("can-be-null", bind.canBeNull, false); + io.mapRequired("ordinal", bind.ordinal); + io.mapRequired("symbol-name", bind.symbolName); + io.mapOptional("addend", bind.addend, Hex64(0)); + } +}; + + +template <> +struct ScalarEnumerationTraits<ExportSymbolKind> { + static void enumeration(IO &io, ExportSymbolKind &value) { + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_REGULAR", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL); + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE); + } +}; + +template <> +struct ScalarBitSetTraits<ExportFlags> { + static void bitset(IO &io, ExportFlags &value) { + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION", + llvm::MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_REEXPORT", + llvm::MachO::EXPORT_SYMBOL_FLAGS_REEXPORT); + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER", + llvm::MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER); + } +}; + + +template <> +struct MappingTraits<Export> { + static void mapping(IO &io, Export &exp) { + io.mapRequired("name", exp.name); + io.mapOptional("offset", exp.offset); + io.mapOptional("kind", exp.kind, + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); + if (!io.outputting() || exp.flags) + io.mapOptional("flags", exp.flags); + io.mapOptional("other", exp.otherOffset, Hex32(0)); + io.mapOptional("other-name", exp.otherName, StringRef()); + } +}; + +template <> +struct ScalarEnumerationTraits<DataRegionType> { + static void enumeration(IO &io, DataRegionType &value) { + io.enumCase(value, "DICE_KIND_DATA", + llvm::MachO::DICE_KIND_DATA); + io.enumCase(value, "DICE_KIND_JUMP_TABLE8", + llvm::MachO::DICE_KIND_JUMP_TABLE8); + io.enumCase(value, "DICE_KIND_JUMP_TABLE16", + llvm::MachO::DICE_KIND_JUMP_TABLE16); + io.enumCase(value, "DICE_KIND_JUMP_TABLE32", + llvm::MachO::DICE_KIND_JUMP_TABLE32); + io.enumCase(value, "DICE_KIND_ABS_JUMP_TABLE32", + llvm::MachO::DICE_KIND_ABS_JUMP_TABLE32); + } +}; + +template <> +struct MappingTraits<DataInCode> { + static void mapping(IO &io, DataInCode &entry) { + io.mapRequired("offset", entry.offset); + io.mapRequired("length", entry.length); + io.mapRequired("kind", entry.kind); + } +}; + +template <> +struct ScalarTraits<PackedVersion> { + static void output(const PackedVersion &value, void*, raw_ostream &out) { + out << llvm::format("%d.%d", (value >> 16), (value >> 8) & 0xFF); + if (value & 0xFF) { + out << llvm::format(".%d", (value & 0xFF)); + } + } + static StringRef input(StringRef scalar, void*, PackedVersion &result) { + uint32_t value; + if (lld::MachOLinkingContext::parsePackedVersion(scalar, value)) + return "malformed version number"; + result = value; + // Return the empty string on success, + return StringRef(); + } + static bool mustQuote(StringRef) { return false; } +}; + +template <> +struct MappingTraits<NormalizedFile> { + static void mapping(IO &io, NormalizedFile &file) { + io.mapRequired("arch", file.arch); + io.mapRequired("file-type", file.fileType); + io.mapOptional("flags", file.flags); + io.mapOptional("dependents", file.dependentDylibs); + io.mapOptional("install-name", file.installName, StringRef()); + io.mapOptional("compat-version", file.compatVersion, PackedVersion(0x10000)); + io.mapOptional("current-version", file.currentVersion, PackedVersion(0x10000)); + io.mapOptional("has-UUID", file.hasUUID, true); + io.mapOptional("rpaths", file.rpaths); + io.mapOptional("entry-point", file.entryAddress, Hex64(0)); + io.mapOptional("stack-size", file.stackSize, Hex64(0)); + io.mapOptional("source-version", file.sourceVersion, Hex64(0)); + io.mapOptional("OS", file.os); + io.mapOptional("min-os-version", file.minOSverson, PackedVersion(0)); + io.mapOptional("min-os-version-kind", file.minOSVersionKind, (LoadCommandType)0); + io.mapOptional("sdk-version", file.sdkVersion, PackedVersion(0)); + io.mapOptional("segments", file.segments); + io.mapOptional("sections", file.sections); + io.mapOptional("local-symbols", file.localSymbols); + io.mapOptional("global-symbols", file.globalSymbols); + io.mapOptional("undefined-symbols",file.undefinedSymbols); + io.mapOptional("page-size", file.pageSize, Hex32(4096)); + io.mapOptional("rebasings", file.rebasingInfo); + io.mapOptional("bindings", file.bindingInfo); + io.mapOptional("weak-bindings", file.weakBindingInfo); + io.mapOptional("lazy-bindings", file.lazyBindingInfo); + io.mapOptional("exports", file.exportInfo); + io.mapOptional("dataInCode", file.dataInCode); + } + static StringRef validate(IO &io, NormalizedFile &file) { + return StringRef(); + } +}; + +} // namespace llvm +} // namespace yaml + + +namespace lld { +namespace mach_o { + +/// Handles !mach-o tagged yaml documents. +bool MachOYamlIOTaggedDocumentHandler::handledDocTag(llvm::yaml::IO &io, + const lld::File *&file) const { + if (!io.mapTag("!mach-o")) + return false; + // Step 1: parse yaml into normalized mach-o struct. + NormalizedFile nf; + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_normalizeMachOFile == nullptr); + info->_normalizeMachOFile = &nf; + MappingTraits<NormalizedFile>::mapping(io, nf); + // Step 2: parse normalized mach-o struct into atoms. + auto fileOrError = normalizedToAtoms(nf, info->_path, true); + + // Check that we parsed successfully. + if (!fileOrError) { + std::string buffer; + llvm::raw_string_ostream stream(buffer); + handleAllErrors(fileOrError.takeError(), + [&](const llvm::ErrorInfoBase &EI) { + EI.log(stream); + stream << "\n"; + }); + io.setError(stream.str()); + return false; + } + + if (nf.arch != _arch) { + io.setError(Twine("file is wrong architecture. Expected (" + + MachOLinkingContext::nameFromArch(_arch) + + ") found (" + + MachOLinkingContext::nameFromArch(nf.arch) + + ")")); + return false; + } + info->_normalizeMachOFile = nullptr; + file = fileOrError->release(); + return true; +} + + + +namespace normalized { + +/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. +llvm::Expected<std::unique_ptr<NormalizedFile>> +readYaml(std::unique_ptr<MemoryBuffer> &mb) { + // Make empty NormalizedFile. + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + + // Create YAML Input parser. + YamlContext yamlContext; + yamlContext._normalizeMachOFile = f.get(); + llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); + + // Fill NormalizedFile by parsing yaml. + yin >> *f; + + // Return error if there were parsing problems. + if (auto ec = yin.error()) + return llvm::make_error<GenericError>(Twine("YAML parsing error: ") + + ec.message()); + + // Hand ownership of instantiated NormalizedFile to caller. + return std::move(f); +} + + +/// Writes a yaml encoded mach-o files from an in-memory normalized view. +std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out) { + // YAML I/O is not const aware, so need to cast away ;-( + NormalizedFile *f = const_cast<NormalizedFile*>(&file); + + // Create yaml Output writer, using yaml options for context. + YamlContext yamlContext; + yamlContext._normalizeMachOFile = f; + llvm::yaml::Output yout(out, &yamlContext); + + // Stream out yaml. + yout << *f; + + return std::error_code(); +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOPasses.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOPasses.h new file mode 100644 index 000000000000..cd01d4aa2c93 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOPasses.h @@ -0,0 +1,30 @@ +//===- lib/ReaderWriter/MachO/MachOPasses.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_PASSES_H +#define LLD_READER_WRITER_MACHO_PASSES_H + +#include "lld/Core/PassManager.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + +void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx); +void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx); +void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx); +void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx); +void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx); +void addObjCPass(PassManager &pm, const MachOLinkingContext &ctx); +void addShimPass(PassManager &pm, const MachOLinkingContext &ctx); + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_PASSES_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp new file mode 100644 index 000000000000..4712d8ca969c --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp @@ -0,0 +1,128 @@ +//===- lib/ReaderWriter/MachO/ObjCPass.cpp -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + +/// +/// ObjC Image Info Atom created by the ObjC pass. +/// +class ObjCImageInfoAtom : public SimpleDefinedAtom { +public: + ObjCImageInfoAtom(const File &file, + MachOLinkingContext::ObjCConstraint objCConstraint, + uint32_t swiftVersion) + : SimpleDefinedAtom(file) { + + Data.info.version = 0; + + switch (objCConstraint) { + case MachOLinkingContext::objc_unknown: + llvm_unreachable("Shouldn't run the objc pass without a constraint"); + case MachOLinkingContext::objc_supports_gc: + case MachOLinkingContext::objc_gc_only: + llvm_unreachable("GC is not supported"); + case MachOLinkingContext::objc_retainReleaseForSimulator: + // The retain/release for simulator flag is already the correct + // encoded value for the data so just set it here. + Data.info.flags = (uint32_t)objCConstraint; + break; + case MachOLinkingContext::objc_retainRelease: + // We don't need to encode this flag, so just leave the flags as 0. + Data.info.flags = 0; + break; + } + + Data.info.flags |= (swiftVersion << 8); + } + + ~ObjCImageInfoAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeObjCImageInfo; + } + + Alignment alignment() const override { + return 4; + } + + uint64_t size() const override { + return 8; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR__; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(Data.bytes, size()); + } + +private: + + struct objc_image_info { + uint32_t version; + uint32_t flags; + }; + + union { + objc_image_info info; + uint8_t bytes[8]; + } Data; +}; + +class ObjCPass : public Pass { +public: + ObjCPass(const MachOLinkingContext &context) + : _ctx(context), + _file(*_ctx.make_file<MachOFile>("<mach-o objc pass>")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + + llvm::Error perform(SimpleFile &mergedFile) override { + // Add the image info. + mergedFile.addAtom(*getImageInfo()); + + return llvm::Error::success(); + } + +private: + + const DefinedAtom* getImageInfo() { + return new (_file.allocator()) ObjCImageInfoAtom(_file, + _ctx.objcConstraint(), + _ctx.swiftVersion()); + } + + const MachOLinkingContext &_ctx; + MachOFile &_file; +}; + + + +void addObjCPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(llvm::make_unique<ObjCPass>(ctx)); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/SectCreateFile.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/SectCreateFile.h new file mode 100644 index 000000000000..49e65f63151d --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/SectCreateFile.h @@ -0,0 +1,102 @@ +//===---- lib/ReaderWriter/MachO/SectCreateFile.h ---------------*- c++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H +#define LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H + +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + +// +// A FlateNamespaceFile instance may be added as a resolution source of last +// resort, depending on how -flat_namespace and -undefined are set. +// +class SectCreateFile : public File { +public: + class SectCreateAtom : public SimpleDefinedAtom { + public: + SectCreateAtom(const File &file, StringRef segName, StringRef sectName, + std::unique_ptr<MemoryBuffer> content) + : SimpleDefinedAtom(file), + _combinedName((segName + "/" + sectName).str()), + _content(std::move(content)) {} + + ~SectCreateAtom() override = default; + + uint64_t size() const override { return _content->getBufferSize(); } + + Scope scope() const override { return scopeGlobal; } + + ContentType contentType() const override { return typeSectCreate; } + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + + StringRef customSectionName() const override { return _combinedName; } + + DeadStripKind deadStrip() const override { return deadStripNever; } + + ArrayRef<uint8_t> rawContent() const override { + const uint8_t *data = + reinterpret_cast<const uint8_t*>(_content->getBufferStart()); + return ArrayRef<uint8_t>(data, _content->getBufferSize()); + } + + StringRef segmentName() const { return _segName; } + StringRef sectionName() const { return _sectName; } + + private: + std::string _combinedName; + StringRef _segName; + StringRef _sectName; + std::unique_ptr<MemoryBuffer> _content; + }; + + SectCreateFile() : File("sectcreate", kindSectCreateObject) {} + + void addSection(StringRef seg, StringRef sect, + std::unique_ptr<MemoryBuffer> content) { + _definedAtoms.push_back( + new (allocator()) SectCreateAtom(*this, seg, sect, std::move(content))); + } + + const AtomRange<DefinedAtom> defined() const override { + return _definedAtoms; + } + + const AtomRange<UndefinedAtom> undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange<AbsoluteAtom> absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _definedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + +private: + AtomVector<DefinedAtom> _definedAtoms; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp new file mode 100644 index 000000000000..ff559d70eabe --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp @@ -0,0 +1,129 @@ +//===- lib/ReaderWriter/MachO/ShimPass.cpp -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This linker pass updates branch-sites whose target is a different mode +// (thumb vs arm). +// +// Arm code has two instruction encodings thumb and arm. When branching from +// one code encoding to another, you need to use an instruction that switches +// the instruction mode. Usually the transition only happens at call sites, and +// the linker can transform a BL instruction in BLX (or vice versa). But if the +// compiler did a tail call optimization and a function ends with a branch (not +// branch and link), there is no pc-rel BX instruction. +// +// The ShimPass looks for pc-rel B instructions that will need to switch mode. +// For those cases it synthesizes a shim which does the transition, then +// modifies the original atom with the B instruction to target to the shim atom. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + +class ShimPass : public Pass { +public: + ShimPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _stubInfo(_archHandler.stubInfo()), + _file(*_ctx.make_file<MachOFile>("<mach-o shim pass>")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + + llvm::Error perform(SimpleFile &mergedFile) override { + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + // Look at non-call branches. + if (!_archHandler.isNonCallBranch(*ref)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + if (const lld::DefinedAtom *daTarget = dyn_cast<DefinedAtom>(target)) { + bool atomIsThumb = _archHandler.isThumbFunction(*atom); + bool targetIsThumb = _archHandler.isThumbFunction(*daTarget); + if (atomIsThumb != targetIsThumb) + updateBranchToUseShim(atomIsThumb, *daTarget, ref); + } + } + } + // Exit early if no shims needed. + if (_targetToShim.empty()) + return llvm::Error::success(); + + // Sort shim atoms so the layout order is stable. + std::vector<const DefinedAtom *> shims; + shims.reserve(_targetToShim.size()); + for (auto element : _targetToShim) { + shims.push_back(element.second); + } + std::sort(shims.begin(), shims.end(), + [](const DefinedAtom *l, const DefinedAtom *r) { + return (l->name() < r->name()); + }); + + // Add all shims to master file. + for (const DefinedAtom *shim : shims) + mergedFile.addAtom(*shim); + + return llvm::Error::success(); + } + +private: + + void updateBranchToUseShim(bool thumbToArm, const DefinedAtom& target, + const Reference *ref) { + // Make file-format specific stub and other support atoms. + const DefinedAtom *shim = this->getShim(thumbToArm, target); + assert(shim != nullptr); + // Switch branch site to target shim atom. + const_cast<Reference *>(ref)->setTarget(shim); + } + + const DefinedAtom* getShim(bool thumbToArm, const DefinedAtom& target) { + auto pos = _targetToShim.find(&target); + if ( pos != _targetToShim.end() ) { + // Reuse an existing shim. + assert(pos->second != nullptr); + return pos->second; + } else { + // There is no existing shim, so create a new one. + const DefinedAtom *shim = _archHandler.createShim(_file, thumbToArm, + target); + _targetToShim[&target] = shim; + return shim; + } + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + const ArchHandler::StubInfo &_stubInfo; + MachOFile &_file; + llvm::DenseMap<const Atom*, const DefinedAtom*> _targetToShim; +}; + + + +void addShimPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(llvm::make_unique<ShimPass>(ctx)); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp new file mode 100644 index 000000000000..19e2bc592f5c --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp @@ -0,0 +1,379 @@ +//===- lib/ReaderWriter/MachO/StubsPass.cpp ---------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This linker pass updates call-sites which have references to shared library +// atoms to instead have a reference to a stub (PLT entry) for the specified +// symbol. Each file format defines a subclass of StubsPass which implements +// the abstract methods for creating the file format specific StubAtoms. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +namespace lld { +namespace mach_o { + +// +// Lazy Pointer Atom created by the stubs pass. +// +class LazyPointerAtom : public SimpleDefinedAtom { +public: + LazyPointerAtom(const File &file, bool is64) + : SimpleDefinedAtom(file), _is64(is64) { } + + ~LazyPointerAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeLazyPointer; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + +private: + const bool _is64; +}; + +// +// NonLazyPointer (GOT) Atom created by the stubs pass. +// +class NonLazyPointerAtom : public SimpleDefinedAtom { +public: + NonLazyPointerAtom(const File &file, bool is64, ContentType contentType) + : SimpleDefinedAtom(file), _is64(is64), _contentType(contentType) { } + + ~NonLazyPointerAtom() override = default; + + ContentType contentType() const override { + return _contentType; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + +private: + const bool _is64; + const ContentType _contentType; +}; + +// +// Stub Atom created by the stubs pass. +// +class StubAtom : public SimpleDefinedAtom { +public: + StubAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo){ } + + ~StubAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeStub; + } + + Alignment alignment() const override { + return 1 << _stubInfo.codeAlignment; + } + + uint64_t size() const override { + return _stubInfo.stubSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubBytes, _stubInfo.stubSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + +// +// Stub Helper Atom created by the stubs pass. +// +class StubHelperAtom : public SimpleDefinedAtom { +public: + StubHelperAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo) { } + + ~StubHelperAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeStubHelper; + } + + Alignment alignment() const override { + return 1 << _stubInfo.codeAlignment; + } + + uint64_t size() const override { + return _stubInfo.stubHelperSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubHelperBytes, + _stubInfo.stubHelperSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + +// +// Stub Helper Common Atom created by the stubs pass. +// +class StubHelperCommonAtom : public SimpleDefinedAtom { +public: + StubHelperCommonAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo) { } + + ~StubHelperCommonAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeStubHelper; + } + + Alignment alignment() const override { + return 1 << _stubInfo.stubHelperCommonAlignment; + } + + uint64_t size() const override { + return _stubInfo.stubHelperCommonSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubHelperCommonBytes, + _stubInfo.stubHelperCommonSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + +class StubsPass : public Pass { +public: + StubsPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _stubInfo(_archHandler.stubInfo()), + _file(*_ctx.make_file<MachOFile>("<mach-o Stubs pass>")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + + llvm::Error perform(SimpleFile &mergedFile) override { + // Skip this pass if output format uses text relocations instead of stubs. + if (!this->noTextRelocs()) + return llvm::Error::success(); + + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + // Look at call-sites. + if (!this->isCallSite(*ref)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + if (isa<SharedLibraryAtom>(target)) { + // Calls to shared libraries go through stubs. + _targetToUses[target].push_back(ref); + continue; + } + const DefinedAtom *defTarget = dyn_cast<DefinedAtom>(target); + if (defTarget && defTarget->interposable() != DefinedAtom::interposeNo){ + // Calls to interposable functions in same linkage unit must also go + // through a stub. + assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit); + _targetToUses[target].push_back(ref); + } + } + } + + // Exit early if no stubs needed. + if (_targetToUses.empty()) + return llvm::Error::success(); + + // First add help-common and GOT slots used by lazy binding. + SimpleDefinedAtom *helperCommonAtom = + new (_file.allocator()) StubHelperCommonAtom(_file, _stubInfo); + SimpleDefinedAtom *helperCacheNLPAtom = + new (_file.allocator()) NonLazyPointerAtom(_file, _ctx.is64Bit(), + _stubInfo.stubHelperImageCacheContentType); + SimpleDefinedAtom *helperBinderNLPAtom = + new (_file.allocator()) NonLazyPointerAtom(_file, _ctx.is64Bit(), + _stubInfo.stubHelperImageCacheContentType); + addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache, + helperCacheNLPAtom); + addOptReference( + helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache, + _stubInfo.optStubHelperCommonReferenceToCache, helperCacheNLPAtom); + addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder, + helperBinderNLPAtom); + addOptReference( + helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder, + _stubInfo.optStubHelperCommonReferenceToBinder, helperBinderNLPAtom); + mergedFile.addAtom(*helperCommonAtom); + mergedFile.addAtom(*helperBinderNLPAtom); + mergedFile.addAtom(*helperCacheNLPAtom); + + // Add reference to dyld_stub_binder in libSystem.dylib + auto I = std::find_if( + mergedFile.sharedLibrary().begin(), mergedFile.sharedLibrary().end(), + [&](const SharedLibraryAtom *atom) { + return atom->name().equals(_stubInfo.binderSymbolName); + }); + assert(I != mergedFile.sharedLibrary().end() && + "dyld_stub_binder not found"); + addReference(helperBinderNLPAtom, _stubInfo.nonLazyPointerReferenceToBinder, *I); + + // Sort targets by name, so stubs and lazy pointers are consistent + std::vector<const Atom *> targetsNeedingStubs; + for (auto it : _targetToUses) + targetsNeedingStubs.push_back(it.first); + std::sort(targetsNeedingStubs.begin(), targetsNeedingStubs.end(), + [](const Atom * left, const Atom * right) { + return (left->name().compare(right->name()) < 0); + }); + + // Make and append stubs, lazy pointers, and helpers in alphabetical order. + unsigned lazyOffset = 0; + for (const Atom *target : targetsNeedingStubs) { + auto *stub = new (_file.allocator()) StubAtom(_file, _stubInfo); + auto *lp = + new (_file.allocator()) LazyPointerAtom(_file, _ctx.is64Bit()); + auto *helper = new (_file.allocator()) StubHelperAtom(_file, _stubInfo); + + addReference(stub, _stubInfo.stubReferenceToLP, lp); + addOptReference(stub, _stubInfo.stubReferenceToLP, + _stubInfo.optStubReferenceToLP, lp); + addReference(lp, _stubInfo.lazyPointerReferenceToHelper, helper); + addReference(lp, _stubInfo.lazyPointerReferenceToFinal, target); + addReference(helper, _stubInfo.stubHelperReferenceToImm, helper); + addReferenceAddend(helper, _stubInfo.stubHelperReferenceToImm, helper, + lazyOffset); + addReference(helper, _stubInfo.stubHelperReferenceToHelperCommon, + helperCommonAtom); + + mergedFile.addAtom(*stub); + mergedFile.addAtom(*lp); + mergedFile.addAtom(*helper); + + // Update each reference to use stub. + for (const Reference *ref : _targetToUses[target]) { + assert(ref->target() == target); + // Switch call site to reference stub atom instead. + const_cast<Reference *>(ref)->setTarget(stub); + } + + // Calculate new offset + lazyOffset += target->name().size() + 12; + } + + return llvm::Error::success(); + } + +private: + bool noTextRelocs() { + return true; + } + + bool isCallSite(const Reference &ref) { + return _archHandler.isCallSite(ref); + } + + void addReference(SimpleDefinedAtom* atom, + const ArchHandler::ReferenceInfo &refInfo, + const lld::Atom* target) { + atom->addReference(Reference::KindNamespace::mach_o, + refInfo.arch, refInfo.kind, refInfo.offset, + target, refInfo.addend); + } + + void addReferenceAddend(SimpleDefinedAtom *atom, + const ArchHandler::ReferenceInfo &refInfo, + const lld::Atom *target, uint64_t addend) { + atom->addReference(Reference::KindNamespace::mach_o, refInfo.arch, + refInfo.kind, refInfo.offset, target, addend); + } + + void addOptReference(SimpleDefinedAtom* atom, + const ArchHandler::ReferenceInfo &refInfo, + const ArchHandler::OptionalRefInfo &optRef, + const lld::Atom* target) { + if (!optRef.used) + return; + atom->addReference(Reference::KindNamespace::mach_o, + refInfo.arch, optRef.kind, optRef.offset, + target, optRef.addend); + } + + typedef llvm::DenseMap<const Atom*, + llvm::SmallVector<const Reference *, 8>> TargetToUses; + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + const ArchHandler::StubInfo &_stubInfo; + MachOFile &_file; + TargetToUses _targetToUses; +}; + +void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(std::unique_ptr<Pass>(new StubsPass(ctx))); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp new file mode 100644 index 000000000000..e362e507ebf2 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp @@ -0,0 +1,141 @@ +//===- lib/ReaderWriter/MachO/TLVPass.cpp -----------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This linker pass transforms all TLV references to real references. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" + +namespace lld { +namespace mach_o { + +// +// TLVP Entry Atom created by the TLV pass. +// +class TLVPEntryAtom : public SimpleDefinedAtom { +public: + TLVPEntryAtom(const File &file, bool is64, StringRef name) + : SimpleDefinedAtom(file), _is64(is64), _name(name) {} + + ~TLVPEntryAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeTLVInitializerPtr; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + + StringRef slotName() const { + return _name; + } + +private: + const bool _is64; + StringRef _name; +}; + +class TLVPass : public Pass { +public: + TLVPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _file(*_ctx.make_file<MachOFile>("<mach-o TLV pass>")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + +private: + llvm::Error perform(SimpleFile &mergedFile) override { + bool allowTLV = _ctx.minOS("10.7", "1.0"); + + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + if (!_archHandler.isTLVAccess(*ref)) + continue; + + if (!allowTLV) + return llvm::make_error<GenericError>( + "targeted OS version does not support use of thread local " + "variables in " + atom->name() + " for architecture " + + _ctx.archName()); + + const Atom *target = ref->target(); + assert(target != nullptr); + + const DefinedAtom *tlvpEntry = makeTLVPEntry(target); + const_cast<Reference*>(ref)->setTarget(tlvpEntry); + _archHandler.updateReferenceToTLV(ref); + } + } + + std::vector<const TLVPEntryAtom*> entries; + entries.reserve(_targetToTLVP.size()); + for (auto &it : _targetToTLVP) + entries.push_back(it.second); + std::sort(entries.begin(), entries.end(), + [](const TLVPEntryAtom *lhs, const TLVPEntryAtom *rhs) { + return (lhs->slotName().compare(rhs->slotName()) < 0); + }); + + for (const TLVPEntryAtom *slot : entries) + mergedFile.addAtom(*slot); + + return llvm::Error::success(); + } + + const DefinedAtom *makeTLVPEntry(const Atom *target) { + auto pos = _targetToTLVP.find(target); + + if (pos != _targetToTLVP.end()) + return pos->second; + + auto *tlvpEntry = new (_file.allocator()) + TLVPEntryAtom(_file, _ctx.is64Bit(), target->name()); + _targetToTLVP[target] = tlvpEntry; + const ArchHandler::ReferenceInfo &nlInfo = + _archHandler.stubInfo().nonLazyPointerReferenceToBinder; + tlvpEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch, + nlInfo.kind, 0, target, 0); + return tlvpEntry; + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + MachOFile &_file; + llvm::DenseMap<const Atom*, const TLVPEntryAtom*> _targetToTLVP; +}; + +void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsTLVPass()); + pm.add(llvm::make_unique<TLVPass>(ctx)); +} + +} // end namesapce mach_o +} // end namesapce lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/WriterMachO.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/WriterMachO.cpp new file mode 100644 index 000000000000..f08487f21ac1 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/WriterMachO.cpp @@ -0,0 +1,71 @@ +//===- lib/ReaderWriter/MachO/WriterMachO.cpp -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ExecutableAtoms.h" +#include "MachONormalizedFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/raw_ostream.h" +#include <system_error> + +using lld::mach_o::normalized::NormalizedFile; + +namespace lld { +namespace mach_o { + +class MachOWriter : public Writer { +public: + MachOWriter(const MachOLinkingContext &ctxt) : _ctx(ctxt) {} + + llvm::Error writeFile(const lld::File &file, StringRef path) override { + // Construct empty normalized file from atoms. + llvm::Expected<std::unique_ptr<NormalizedFile>> nFile = + normalized::normalizedFromAtoms(file, _ctx); + if (auto ec = nFile.takeError()) + return ec; + + // For testing, write out yaml form of normalized file. + if (_ctx.printAtoms()) { + std::unique_ptr<Writer> yamlWriter = createWriterYAML(_ctx); + if (auto ec = yamlWriter->writeFile(file, "-")) + return ec; + } + + // Write normalized file as mach-o binary. + return writeBinary(*nFile->get(), path); + } + + void createImplicitFiles(std::vector<std::unique_ptr<File>> &r) override { + // When building main executables, add _main as required entry point. + if (_ctx.outputTypeHasEntry()) + r.emplace_back(new CEntryFile(_ctx)); + // If this can link with dylibs, need helper function (dyld_stub_binder). + if (_ctx.needsStubsPass()) + r.emplace_back(new StubHelperFile(_ctx)); + // Final linked images can access a symbol for their mach_header. + if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + r.emplace_back(new MachHeaderAliasFile(_ctx)); + } +private: + const MachOLinkingContext &_ctx; + }; + + +} // namespace mach_o + +std::unique_ptr<Writer> createWriterMachO(const MachOLinkingContext &context) { + return std::unique_ptr<Writer>(new lld::mach_o::MachOWriter(context)); +} + +} // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/CMakeLists.txt b/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/CMakeLists.txt new file mode 100644 index 000000000000..0e63574a63d2 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/CMakeLists.txt @@ -0,0 +1,9 @@ +add_lld_library(lldYAML + ReaderWriterYAML.cpp + + LINK_COMPONENTS + Support + + LINK_LIBS + lldCore + ) diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp new file mode 100644 index 000000000000..59ca43079a6d --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp @@ -0,0 +1,1402 @@ +//===- lib/ReaderWriter/YAML/ReaderWriterYAML.cpp -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/Error.h" +#include "lld/Core/File.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Reference.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/Simple.h" +#include "lld/Core/UndefinedAtom.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/YamlContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> +#include <cstring> +#include <memory> +#include <string> +#include <system_error> +#include <vector> + +using llvm::yaml::MappingTraits; +using llvm::yaml::ScalarEnumerationTraits; +using llvm::yaml::ScalarTraits; +using llvm::yaml::IO; +using llvm::yaml::SequenceTraits; +using llvm::yaml::DocumentListTraits; + +using namespace lld; + +/// The conversion of Atoms to and from YAML uses LLVM's YAML I/O. This +/// file just defines template specializations on the lld types which control +/// how the mapping is done to and from YAML. + +namespace { + +/// Used when writing yaml files. +/// In most cases, atoms names are unambiguous, so references can just +/// use the atom name as the target (e.g. target: foo). But in a few +/// cases that does not work, so ref-names are added. These are labels +/// used only in yaml. The labels do not exist in the Atom model. +/// +/// One need for ref-names are when atoms have no user supplied name +/// (e.g. c-string literal). Another case is when two object files with +/// identically named static functions are merged (ld -r) into one object file. +/// In that case referencing the function by name is ambiguous, so a unique +/// ref-name is added. +class RefNameBuilder { +public: + RefNameBuilder(const lld::File &file) + : _collisionCount(0), _unnamedCounter(0) { + // visit all atoms + for (const lld::DefinedAtom *atom : file.defined()) { + // Build map of atoms names to detect duplicates + if (!atom->name().empty()) + buildDuplicateNameMap(*atom); + + // Find references to unnamed atoms and create ref-names for them. + for (const lld::Reference *ref : *atom) { + // create refname for any unnamed reference target + const lld::Atom *target = ref->target(); + if ((target != nullptr) && target->name().empty()) { + std::string storage; + llvm::raw_string_ostream buffer(storage); + buffer << llvm::format("L%03d", _unnamedCounter++); + StringRef newName = copyString(buffer.str()); + _refNames[target] = newName; + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "unnamed atom: creating ref-name: '" + << newName << "' (" + << (const void *)newName.data() << ", " + << newName.size() << ")\n"); + } + } + } + for (const lld::UndefinedAtom *undefAtom : file.undefined()) { + buildDuplicateNameMap(*undefAtom); + } + for (const lld::SharedLibraryAtom *shlibAtom : file.sharedLibrary()) { + buildDuplicateNameMap(*shlibAtom); + } + for (const lld::AbsoluteAtom *absAtom : file.absolute()) { + if (!absAtom->name().empty()) + buildDuplicateNameMap(*absAtom); + } + } + + void buildDuplicateNameMap(const lld::Atom &atom) { + assert(!atom.name().empty()); + NameToAtom::iterator pos = _nameMap.find(atom.name()); + if (pos != _nameMap.end()) { + // Found name collision, give each a unique ref-name. + std::string Storage; + llvm::raw_string_ostream buffer(Storage); + buffer << atom.name() << llvm::format(".%03d", ++_collisionCount); + StringRef newName = copyString(buffer.str()); + _refNames[&atom] = newName; + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "name collsion: creating ref-name: '" + << newName << "' (" + << (const void *)newName.data() + << ", " << newName.size() << ")\n"); + const lld::Atom *prevAtom = pos->second; + AtomToRefName::iterator pos2 = _refNames.find(prevAtom); + if (pos2 == _refNames.end()) { + // Only create ref-name for previous if none already created. + std::string Storage2; + llvm::raw_string_ostream buffer2(Storage2); + buffer2 << prevAtom->name() << llvm::format(".%03d", ++_collisionCount); + StringRef newName2 = copyString(buffer2.str()); + _refNames[prevAtom] = newName2; + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "name collsion: creating ref-name: '" + << newName2 << "' (" + << (const void *)newName2.data() << ", " + << newName2.size() << ")\n"); + } + } else { + // First time we've seen this name, just add it to map. + _nameMap[atom.name()] = &atom; + DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs() + << "atom name seen for first time: '" + << atom.name() << "' (" + << (const void *)atom.name().data() + << ", " << atom.name().size() << ")\n"); + } + } + + bool hasRefName(const lld::Atom *atom) { return _refNames.count(atom); } + + StringRef refName(const lld::Atom *atom) { + return _refNames.find(atom)->second; + } + +private: + typedef llvm::StringMap<const lld::Atom *> NameToAtom; + typedef llvm::DenseMap<const lld::Atom *, std::string> AtomToRefName; + + // Allocate a new copy of this string in _storage, so the strings + // can be freed when RefNameBuilder is destroyed. + StringRef copyString(StringRef str) { + char *s = _storage.Allocate<char>(str.size()); + memcpy(s, str.data(), str.size()); + return StringRef(s, str.size()); + } + + unsigned int _collisionCount; + unsigned int _unnamedCounter; + NameToAtom _nameMap; + AtomToRefName _refNames; + llvm::BumpPtrAllocator _storage; +}; + +/// Used when reading yaml files to find the target of a reference +/// that could be a name or ref-name. +class RefNameResolver { +public: + RefNameResolver(const lld::File *file, IO &io); + + const lld::Atom *lookup(StringRef name) const { + NameToAtom::const_iterator pos = _nameMap.find(name); + if (pos != _nameMap.end()) + return pos->second; + _io.setError(Twine("no such atom name: ") + name); + return nullptr; + } + +private: + typedef llvm::StringMap<const lld::Atom *> NameToAtom; + + void add(StringRef name, const lld::Atom *atom) { + if (_nameMap.count(name)) { + _io.setError(Twine("duplicate atom name: ") + name); + } else { + _nameMap[name] = atom; + } + } + + IO &_io; + NameToAtom _nameMap; +}; + +/// Mapping of Atoms. +template <typename T> class AtomList { + using Ty = std::vector<OwningAtomPtr<T>>; + +public: + typename Ty::iterator begin() { return _atoms.begin(); } + typename Ty::iterator end() { return _atoms.end(); } + Ty _atoms; +}; + +/// Mapping of kind: field in yaml files. +enum FileKinds { + fileKindObjectAtoms, // atom based object file encoded in yaml + fileKindArchive, // static archive library encoded in yaml + fileKindObjectMachO // mach-o object files encoded in yaml +}; + +struct ArchMember { + FileKinds _kind; + StringRef _name; + const lld::File *_content; +}; + +// The content bytes in a DefinedAtom are just uint8_t but we want +// special formatting, so define a strong type. +LLVM_YAML_STRONG_TYPEDEF(uint8_t, ImplicitHex8) + +// SharedLibraryAtoms have a bool canBeNull() method which we'd like to be +// more readable than just true/false. +LLVM_YAML_STRONG_TYPEDEF(bool, ShlibCanBeNull) + +// lld::Reference::Kind is a tuple of <namespace, arch, value>. +// For yaml, we just want one string that encapsulates the tuple. +struct RefKind { + Reference::KindNamespace ns; + Reference::KindArch arch; + Reference::KindValue value; +}; + +} // end anonymous namespace + +LLVM_YAML_IS_SEQUENCE_VECTOR(ArchMember) +LLVM_YAML_IS_SEQUENCE_VECTOR(const lld::Reference *) +// Always write DefinedAtoms content bytes as a flow sequence. +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(ImplicitHex8) + +// for compatibility with gcc-4.7 in C++11 mode, add extra namespace +namespace llvm { +namespace yaml { + +// This is a custom formatter for RefKind +template <> struct ScalarTraits<RefKind> { + static void output(const RefKind &kind, void *ctxt, raw_ostream &out) { + assert(ctxt != nullptr); + YamlContext *info = reinterpret_cast<YamlContext *>(ctxt); + assert(info->_registry); + StringRef str; + if (info->_registry->referenceKindToString(kind.ns, kind.arch, kind.value, + str)) + out << str; + else + out << (int)(kind.ns) << "-" << (int)(kind.arch) << "-" << kind.value; + } + + static StringRef input(StringRef scalar, void *ctxt, RefKind &kind) { + assert(ctxt != nullptr); + YamlContext *info = reinterpret_cast<YamlContext *>(ctxt); + assert(info->_registry); + if (info->_registry->referenceKindFromString(scalar, kind.ns, kind.arch, + kind.value)) + return StringRef(); + return StringRef("unknown reference kind"); + } + + static bool mustQuote(StringRef) { return false; } +}; + +template <> struct ScalarEnumerationTraits<lld::File::Kind> { + static void enumeration(IO &io, lld::File::Kind &value) { + io.enumCase(value, "error-object", lld::File::kindErrorObject); + io.enumCase(value, "object", lld::File::kindMachObject); + io.enumCase(value, "shared-library", lld::File::kindSharedLibrary); + io.enumCase(value, "static-library", lld::File::kindArchiveLibrary); + } +}; + +template <> struct ScalarEnumerationTraits<lld::Atom::Scope> { + static void enumeration(IO &io, lld::Atom::Scope &value) { + io.enumCase(value, "global", lld::Atom::scopeGlobal); + io.enumCase(value, "hidden", lld::Atom::scopeLinkageUnit); + io.enumCase(value, "static", lld::Atom::scopeTranslationUnit); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::SectionChoice> { + static void enumeration(IO &io, lld::DefinedAtom::SectionChoice &value) { + io.enumCase(value, "content", lld::DefinedAtom::sectionBasedOnContent); + io.enumCase(value, "custom", lld::DefinedAtom::sectionCustomPreferred); + io.enumCase(value, "custom-required", + lld::DefinedAtom::sectionCustomRequired); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::Interposable> { + static void enumeration(IO &io, lld::DefinedAtom::Interposable &value) { + io.enumCase(value, "no", DefinedAtom::interposeNo); + io.enumCase(value, "yes", DefinedAtom::interposeYes); + io.enumCase(value, "yes-and-weak", DefinedAtom::interposeYesAndRuntimeWeak); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::Merge> { + static void enumeration(IO &io, lld::DefinedAtom::Merge &value) { + io.enumCase(value, "no", lld::DefinedAtom::mergeNo); + io.enumCase(value, "as-tentative", lld::DefinedAtom::mergeAsTentative); + io.enumCase(value, "as-weak", lld::DefinedAtom::mergeAsWeak); + io.enumCase(value, "as-addressed-weak", + lld::DefinedAtom::mergeAsWeakAndAddressUsed); + io.enumCase(value, "by-content", lld::DefinedAtom::mergeByContent); + io.enumCase(value, "same-name-and-size", + lld::DefinedAtom::mergeSameNameAndSize); + io.enumCase(value, "largest", lld::DefinedAtom::mergeByLargestSection); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::DeadStripKind> { + static void enumeration(IO &io, lld::DefinedAtom::DeadStripKind &value) { + io.enumCase(value, "normal", lld::DefinedAtom::deadStripNormal); + io.enumCase(value, "never", lld::DefinedAtom::deadStripNever); + io.enumCase(value, "always", lld::DefinedAtom::deadStripAlways); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::DynamicExport> { + static void enumeration(IO &io, lld::DefinedAtom::DynamicExport &value) { + io.enumCase(value, "normal", lld::DefinedAtom::dynamicExportNormal); + io.enumCase(value, "always", lld::DefinedAtom::dynamicExportAlways); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::CodeModel> { + static void enumeration(IO &io, lld::DefinedAtom::CodeModel &value) { + io.enumCase(value, "none", lld::DefinedAtom::codeNA); + io.enumCase(value, "mips-pic", lld::DefinedAtom::codeMipsPIC); + io.enumCase(value, "mips-micro", lld::DefinedAtom::codeMipsMicro); + io.enumCase(value, "mips-micro-pic", lld::DefinedAtom::codeMipsMicroPIC); + io.enumCase(value, "mips-16", lld::DefinedAtom::codeMips16); + io.enumCase(value, "arm-thumb", lld::DefinedAtom::codeARMThumb); + io.enumCase(value, "arm-a", lld::DefinedAtom::codeARM_a); + io.enumCase(value, "arm-d", lld::DefinedAtom::codeARM_d); + io.enumCase(value, "arm-t", lld::DefinedAtom::codeARM_t); + } +}; + +template <> +struct ScalarEnumerationTraits<lld::DefinedAtom::ContentPermissions> { + static void enumeration(IO &io, lld::DefinedAtom::ContentPermissions &value) { + io.enumCase(value, "---", lld::DefinedAtom::perm___); + io.enumCase(value, "r--", lld::DefinedAtom::permR__); + io.enumCase(value, "r-x", lld::DefinedAtom::permR_X); + io.enumCase(value, "rw-", lld::DefinedAtom::permRW_); + io.enumCase(value, "rwx", lld::DefinedAtom::permRWX); + io.enumCase(value, "rw-l", lld::DefinedAtom::permRW_L); + io.enumCase(value, "unknown", lld::DefinedAtom::permUnknown); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::ContentType> { + static void enumeration(IO &io, lld::DefinedAtom::ContentType &value) { + io.enumCase(value, "unknown", DefinedAtom::typeUnknown); + io.enumCase(value, "code", DefinedAtom::typeCode); + io.enumCase(value, "stub", DefinedAtom::typeStub); + io.enumCase(value, "constant", DefinedAtom::typeConstant); + io.enumCase(value, "data", DefinedAtom::typeData); + io.enumCase(value, "quick-data", DefinedAtom::typeDataFast); + io.enumCase(value, "zero-fill", DefinedAtom::typeZeroFill); + io.enumCase(value, "zero-fill-quick", DefinedAtom::typeZeroFillFast); + io.enumCase(value, "const-data", DefinedAtom::typeConstData); + io.enumCase(value, "got", DefinedAtom::typeGOT); + io.enumCase(value, "resolver", DefinedAtom::typeResolver); + io.enumCase(value, "branch-island", DefinedAtom::typeBranchIsland); + io.enumCase(value, "branch-shim", DefinedAtom::typeBranchShim); + io.enumCase(value, "stub-helper", DefinedAtom::typeStubHelper); + io.enumCase(value, "c-string", DefinedAtom::typeCString); + io.enumCase(value, "utf16-string", DefinedAtom::typeUTF16String); + io.enumCase(value, "unwind-cfi", DefinedAtom::typeCFI); + io.enumCase(value, "unwind-lsda", DefinedAtom::typeLSDA); + io.enumCase(value, "const-4-byte", DefinedAtom::typeLiteral4); + io.enumCase(value, "const-8-byte", DefinedAtom::typeLiteral8); + io.enumCase(value, "const-16-byte", DefinedAtom::typeLiteral16); + io.enumCase(value, "lazy-pointer", DefinedAtom::typeLazyPointer); + io.enumCase(value, "lazy-dylib-pointer", + DefinedAtom::typeLazyDylibPointer); + io.enumCase(value, "cfstring", DefinedAtom::typeCFString); + io.enumCase(value, "initializer-pointer", + DefinedAtom::typeInitializerPtr); + io.enumCase(value, "terminator-pointer", + DefinedAtom::typeTerminatorPtr); + io.enumCase(value, "c-string-pointer",DefinedAtom::typeCStringPtr); + io.enumCase(value, "objc-class-pointer", + DefinedAtom::typeObjCClassPtr); + io.enumCase(value, "objc-category-list", + DefinedAtom::typeObjC2CategoryList); + io.enumCase(value, "objc-image-info", + DefinedAtom::typeObjCImageInfo); + io.enumCase(value, "objc-method-list", + DefinedAtom::typeObjCMethodList); + io.enumCase(value, "objc-class1", DefinedAtom::typeObjC1Class); + io.enumCase(value, "dtraceDOF", DefinedAtom::typeDTraceDOF); + io.enumCase(value, "interposing-tuples", + DefinedAtom::typeInterposingTuples); + io.enumCase(value, "lto-temp", DefinedAtom::typeTempLTO); + io.enumCase(value, "compact-unwind", DefinedAtom::typeCompactUnwindInfo); + io.enumCase(value, "unwind-info", DefinedAtom::typeProcessedUnwindInfo); + io.enumCase(value, "tlv-thunk", DefinedAtom::typeThunkTLV); + io.enumCase(value, "tlv-data", DefinedAtom::typeTLVInitialData); + io.enumCase(value, "tlv-zero-fill", DefinedAtom::typeTLVInitialZeroFill); + io.enumCase(value, "tlv-initializer-ptr", + DefinedAtom::typeTLVInitializerPtr); + io.enumCase(value, "mach_header", DefinedAtom::typeMachHeader); + io.enumCase(value, "dso_handle", DefinedAtom::typeDSOHandle); + io.enumCase(value, "sectcreate", DefinedAtom::typeSectCreate); + } +}; + +template <> struct ScalarEnumerationTraits<lld::UndefinedAtom::CanBeNull> { + static void enumeration(IO &io, lld::UndefinedAtom::CanBeNull &value) { + io.enumCase(value, "never", lld::UndefinedAtom::canBeNullNever); + io.enumCase(value, "at-runtime", lld::UndefinedAtom::canBeNullAtRuntime); + io.enumCase(value, "at-buildtime",lld::UndefinedAtom::canBeNullAtBuildtime); + } +}; + +template <> struct ScalarEnumerationTraits<ShlibCanBeNull> { + static void enumeration(IO &io, ShlibCanBeNull &value) { + io.enumCase(value, "never", false); + io.enumCase(value, "at-runtime", true); + } +}; + +template <> +struct ScalarEnumerationTraits<lld::SharedLibraryAtom::Type> { + static void enumeration(IO &io, lld::SharedLibraryAtom::Type &value) { + io.enumCase(value, "code", lld::SharedLibraryAtom::Type::Code); + io.enumCase(value, "data", lld::SharedLibraryAtom::Type::Data); + io.enumCase(value, "unknown", lld::SharedLibraryAtom::Type::Unknown); + } +}; + +/// This is a custom formatter for lld::DefinedAtom::Alignment. Values look +/// like: +/// 8 # 8-byte aligned +/// 7 mod 16 # 16-byte aligned plus 7 bytes +template <> struct ScalarTraits<lld::DefinedAtom::Alignment> { + static void output(const lld::DefinedAtom::Alignment &value, void *ctxt, + raw_ostream &out) { + if (value.modulus == 0) { + out << llvm::format("%d", value.value); + } else { + out << llvm::format("%d mod %d", value.modulus, value.value); + } + } + + static StringRef input(StringRef scalar, void *ctxt, + lld::DefinedAtom::Alignment &value) { + value.modulus = 0; + size_t modStart = scalar.find("mod"); + if (modStart != StringRef::npos) { + StringRef modStr = scalar.slice(0, modStart); + modStr = modStr.rtrim(); + unsigned int modulus; + if (modStr.getAsInteger(0, modulus)) { + return "malformed alignment modulus"; + } + value.modulus = modulus; + scalar = scalar.drop_front(modStart + 3); + scalar = scalar.ltrim(); + } + unsigned int power; + if (scalar.getAsInteger(0, power)) { + return "malformed alignment power"; + } + value.value = power; + if (value.modulus >= power) { + return "malformed alignment, modulus too large for power"; + } + return StringRef(); // returning empty string means success + } + + static bool mustQuote(StringRef) { return false; } +}; + +template <> struct ScalarEnumerationTraits<FileKinds> { + static void enumeration(IO &io, FileKinds &value) { + io.enumCase(value, "object", fileKindObjectAtoms); + io.enumCase(value, "archive", fileKindArchive); + io.enumCase(value, "object-mach-o", fileKindObjectMachO); + } +}; + +template <> struct MappingTraits<ArchMember> { + static void mapping(IO &io, ArchMember &member) { + io.mapOptional("kind", member._kind, fileKindObjectAtoms); + io.mapOptional("name", member._name); + io.mapRequired("content", member._content); + } +}; + +// Declare that an AtomList is a yaml sequence. +template <typename T> struct SequenceTraits<AtomList<T> > { + static size_t size(IO &io, AtomList<T> &seq) { return seq._atoms.size(); } + static T *&element(IO &io, AtomList<T> &seq, size_t index) { + if (index >= seq._atoms.size()) + seq._atoms.resize(index + 1); + return seq._atoms[index].get(); + } +}; + +// Declare that an AtomRange is a yaml sequence. +template <typename T> struct SequenceTraits<File::AtomRange<T> > { + static size_t size(IO &io, File::AtomRange<T> &seq) { return seq.size(); } + static T *&element(IO &io, File::AtomRange<T> &seq, size_t index) { + assert(io.outputting() && "AtomRange only used when outputting"); + assert(index < seq.size() && "Out of range access"); + return seq[index].get(); + } +}; + +// Used to allow DefinedAtom content bytes to be a flow sequence of +// two-digit hex numbers without the leading 0x (e.g. FF, 04, 0A) +template <> struct ScalarTraits<ImplicitHex8> { + static void output(const ImplicitHex8 &val, void *, raw_ostream &out) { + uint8_t num = val; + out << llvm::format("%02X", num); + } + + static StringRef input(StringRef str, void *, ImplicitHex8 &val) { + unsigned long long n; + if (getAsUnsignedInteger(str, 16, n)) + return "invalid two-digit-hex number"; + if (n > 0xFF) + return "out of range two-digit-hex number"; + val = n; + return StringRef(); // returning empty string means success + } + + static bool mustQuote(StringRef) { return false; } +}; + +// YAML conversion for std::vector<const lld::File*> +template <> struct DocumentListTraits<std::vector<const lld::File *> > { + static size_t size(IO &io, std::vector<const lld::File *> &seq) { + return seq.size(); + } + static const lld::File *&element(IO &io, std::vector<const lld::File *> &seq, + size_t index) { + if (index >= seq.size()) + seq.resize(index + 1); + return seq[index]; + } +}; + +// YAML conversion for const lld::File* +template <> struct MappingTraits<const lld::File *> { + class NormArchiveFile : public lld::ArchiveLibraryFile { + public: + NormArchiveFile(IO &io) : ArchiveLibraryFile("") {} + + NormArchiveFile(IO &io, const lld::File *file) + : ArchiveLibraryFile(file->path()), _path(file->path()) { + // If we want to support writing archives, this constructor would + // need to populate _members. + } + + const lld::File *denormalize(IO &io) { return this; } + + const AtomRange<lld::DefinedAtom> defined() const override { + return _noDefinedAtoms; + } + + const AtomRange<lld::UndefinedAtom> undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange<lld::SharedLibraryAtom> sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange<lld::AbsoluteAtom> absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _noDefinedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + + File *find(StringRef name) override { + for (const ArchMember &member : _members) + for (const lld::DefinedAtom *atom : member._content->defined()) + if (name == atom->name()) + return const_cast<File *>(member._content); + return nullptr; + } + + std::error_code + parseAllMembers(std::vector<std::unique_ptr<File>> &result) override { + return std::error_code(); + } + + StringRef _path; + std::vector<ArchMember> _members; + }; + + class NormalizedFile : public lld::File { + public: + NormalizedFile(IO &io) + : File("", kindNormalizedObject), _io(io), _rnb(nullptr), + _definedAtomsRef(_definedAtoms._atoms), + _undefinedAtomsRef(_undefinedAtoms._atoms), + _sharedLibraryAtomsRef(_sharedLibraryAtoms._atoms), + _absoluteAtomsRef(_absoluteAtoms._atoms) {} + + NormalizedFile(IO &io, const lld::File *file) + : File(file->path(), kindNormalizedObject), _io(io), + _rnb(new RefNameBuilder(*file)), _path(file->path()), + _definedAtomsRef(file->defined()), + _undefinedAtomsRef(file->undefined()), + _sharedLibraryAtomsRef(file->sharedLibrary()), + _absoluteAtomsRef(file->absolute()) { + } + + ~NormalizedFile() override { + } + + const lld::File *denormalize(IO &io); + + const AtomRange<lld::DefinedAtom> defined() const override { + return _definedAtomsRef; + } + + const AtomRange<lld::UndefinedAtom> undefined() const override { + return _undefinedAtomsRef; + } + + const AtomRange<lld::SharedLibraryAtom> sharedLibrary() const override { + return _sharedLibraryAtomsRef; + } + + const AtomRange<lld::AbsoluteAtom> absolute() const override { + return _absoluteAtomsRef; + } + + void clearAtoms() override { + _definedAtoms._atoms.clear(); + _undefinedAtoms._atoms.clear(); + _sharedLibraryAtoms._atoms.clear(); + _absoluteAtoms._atoms.clear(); + } + + // Allocate a new copy of this string in _storage, so the strings + // can be freed when File is destroyed. + StringRef copyString(StringRef str) { + char *s = _storage.Allocate<char>(str.size()); + memcpy(s, str.data(), str.size()); + return StringRef(s, str.size()); + } + + IO &_io; + std::unique_ptr<RefNameBuilder> _rnb; + StringRef _path; + AtomList<lld::DefinedAtom> _definedAtoms; + AtomList<lld::UndefinedAtom> _undefinedAtoms; + AtomList<lld::SharedLibraryAtom> _sharedLibraryAtoms; + AtomList<lld::AbsoluteAtom> _absoluteAtoms; + AtomRange<lld::DefinedAtom> _definedAtomsRef; + AtomRange<lld::UndefinedAtom> _undefinedAtomsRef; + AtomRange<lld::SharedLibraryAtom> _sharedLibraryAtomsRef; + AtomRange<lld::AbsoluteAtom> _absoluteAtomsRef; + llvm::BumpPtrAllocator _storage; + }; + + static void mapping(IO &io, const lld::File *&file) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + // Let any register tag handler process this. + if (info->_registry && info->_registry->handleTaggedDoc(io, file)) + return; + // If no registered handler claims this tag and there is no tag, + // grandfather in as "!native". + if (io.mapTag("!native", true) || io.mapTag("tag:yaml.org,2002:map")) + mappingAtoms(io, file); + } + + static void mappingAtoms(IO &io, const lld::File *&file) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedFile, const lld::File *> + keys(io, file, nullptr); + assert(info != nullptr); + info->_file = keys.operator->(); + + io.mapOptional("path", keys->_path); + + if (io.outputting()) { + io.mapOptional("defined-atoms", keys->_definedAtomsRef); + io.mapOptional("undefined-atoms", keys->_undefinedAtomsRef); + io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtomsRef); + io.mapOptional("absolute-atoms", keys->_absoluteAtomsRef); + } else { + io.mapOptional("defined-atoms", keys->_definedAtoms); + io.mapOptional("undefined-atoms", keys->_undefinedAtoms); + io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtoms); + io.mapOptional("absolute-atoms", keys->_absoluteAtoms); + } + } + + static void mappingArchive(IO &io, const lld::File *&file) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormArchiveFile, const lld::File *> + keys(io, file, &info->_file->allocator()); + + io.mapOptional("path", keys->_path); + io.mapOptional("members", keys->_members); + } +}; + +// YAML conversion for const lld::Reference* +template <> struct MappingTraits<const lld::Reference *> { + class NormalizedReference : public lld::Reference { + public: + NormalizedReference(IO &io) + : lld::Reference(lld::Reference::KindNamespace::all, + lld::Reference::KindArch::all, 0), + _target(nullptr), _offset(0), _addend(0), _tag(0) {} + + NormalizedReference(IO &io, const lld::Reference *ref) + : lld::Reference(ref->kindNamespace(), ref->kindArch(), + ref->kindValue()), + _target(nullptr), _targetName(targetName(io, ref)), + _offset(ref->offsetInAtom()), _addend(ref->addend()), + _tag(ref->tag()) { + _mappedKind.ns = ref->kindNamespace(); + _mappedKind.arch = ref->kindArch(); + _mappedKind.value = ref->kindValue(); + } + + const lld::Reference *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_targetName.empty()) + _targetName = f->copyString(_targetName); + DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs() + << "created Reference to name: '" + << _targetName << "' (" + << (const void *)_targetName.data() + << ", " << _targetName.size() << ")\n"); + setKindNamespace(_mappedKind.ns); + setKindArch(_mappedKind.arch); + setKindValue(_mappedKind.value); + return this; + } + + void bind(const RefNameResolver &); + static StringRef targetName(IO &io, const lld::Reference *ref); + + uint64_t offsetInAtom() const override { return _offset; } + const lld::Atom *target() const override { return _target; } + Addend addend() const override { return _addend; } + void setAddend(Addend a) override { _addend = a; } + void setTarget(const lld::Atom *a) override { _target = a; } + + const lld::Atom *_target; + StringRef _targetName; + uint32_t _offset; + Addend _addend; + RefKind _mappedKind; + uint32_t _tag; + }; + + static void mapping(IO &io, const lld::Reference *&ref) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedReference, const lld::Reference *> keys( + io, ref, &info->_file->allocator()); + + io.mapRequired("kind", keys->_mappedKind); + io.mapOptional("offset", keys->_offset); + io.mapOptional("target", keys->_targetName); + io.mapOptional("addend", keys->_addend, (lld::Reference::Addend)0); + io.mapOptional("tag", keys->_tag, 0u); + } +}; + +// YAML conversion for const lld::DefinedAtom* +template <> struct MappingTraits<const lld::DefinedAtom *> { + + class NormalizedAtom : public lld::DefinedAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _contentType(), _alignment(1) { + static uint32_t ordinalCounter = 1; + _ordinal = ordinalCounter++; + } + + NormalizedAtom(IO &io, const lld::DefinedAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _scope(atom->scope()), _interpose(atom->interposable()), + _merge(atom->merge()), _contentType(atom->contentType()), + _alignment(atom->alignment()), _sectionChoice(atom->sectionChoice()), + _deadStrip(atom->deadStrip()), _dynamicExport(atom->dynamicExport()), + _codeModel(atom->codeModel()), + _permissions(atom->permissions()), _size(atom->size()), + _sectionName(atom->customSectionName()), + _sectionSize(atom->sectionSize()) { + for (const lld::Reference *r : *atom) + _references.push_back(r); + if (!atom->occupiesDiskSpace()) + return; + ArrayRef<uint8_t> cont = atom->rawContent(); + _content.reserve(cont.size()); + for (uint8_t x : cont) + _content.push_back(x); + } + + ~NormalizedAtom() override = default; + + const lld::DefinedAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + if (!_refName.empty()) + _refName = f->copyString(_refName); + if (!_sectionName.empty()) + _sectionName = f->copyString(_sectionName); + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created DefinedAtom named: '" << _name + << "' (" << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + + void bind(const RefNameResolver &); + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + uint64_t size() const override { return _size; } + Scope scope() const override { return _scope; } + Interposable interposable() const override { return _interpose; } + Merge merge() const override { return _merge; } + ContentType contentType() const override { return _contentType; } + Alignment alignment() const override { return _alignment; } + SectionChoice sectionChoice() const override { return _sectionChoice; } + StringRef customSectionName() const override { return _sectionName; } + uint64_t sectionSize() const override { return _sectionSize; } + DeadStripKind deadStrip() const override { return _deadStrip; } + DynamicExport dynamicExport() const override { return _dynamicExport; } + CodeModel codeModel() const override { return _codeModel; } + ContentPermissions permissions() const override { return _permissions; } + ArrayRef<uint8_t> rawContent() const override { + if (!occupiesDiskSpace()) + return ArrayRef<uint8_t>(); + return ArrayRef<uint8_t>( + reinterpret_cast<const uint8_t *>(_content.data()), _content.size()); + } + + uint64_t ordinal() const override { return _ordinal; } + + reference_iterator begin() const override { + uintptr_t index = 0; + const void *it = reinterpret_cast<const void *>(index); + return reference_iterator(*this, it); + } + reference_iterator end() const override { + uintptr_t index = _references.size(); + const void *it = reinterpret_cast<const void *>(index); + return reference_iterator(*this, it); + } + const lld::Reference *derefIterator(const void *it) const override { + uintptr_t index = reinterpret_cast<uintptr_t>(it); + assert(index < _references.size()); + return _references[index]; + } + void incrementIterator(const void *&it) const override { + uintptr_t index = reinterpret_cast<uintptr_t>(it); + ++index; + it = reinterpret_cast<const void *>(index); + } + + void addReference(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue kindValue, uint64_t off, + const Atom *target, Reference::Addend a) override { + assert(target && "trying to create reference to nothing"); + auto node = new (file().allocator()) SimpleReference(ns, arch, kindValue, + off, target, a); + _references.push_back(node); + } + + const lld::File &_file; + StringRef _name; + StringRef _refName; + Scope _scope; + Interposable _interpose; + Merge _merge; + ContentType _contentType; + Alignment _alignment; + SectionChoice _sectionChoice; + DeadStripKind _deadStrip; + DynamicExport _dynamicExport; + CodeModel _codeModel; + ContentPermissions _permissions; + uint32_t _ordinal; + std::vector<ImplicitHex8> _content; + uint64_t _size; + StringRef _sectionName; + uint64_t _sectionSize; + std::vector<const lld::Reference *> _references; + }; + + static void mapping(IO &io, const lld::DefinedAtom *&atom) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedAtom, const lld::DefinedAtom *> keys( + io, atom, &info->_file->allocator()); + if (io.outputting()) { + // If writing YAML, check if atom needs a ref-name. + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + assert(info != nullptr); + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + assert(f); + assert(f->_rnb); + if (f->_rnb->hasRefName(atom)) { + keys->_refName = f->_rnb->refName(atom); + } + } + + io.mapOptional("name", keys->_name, StringRef()); + io.mapOptional("ref-name", keys->_refName, StringRef()); + io.mapOptional("scope", keys->_scope, + DefinedAtom::scopeTranslationUnit); + io.mapOptional("type", keys->_contentType, + DefinedAtom::typeCode); + io.mapOptional("content", keys->_content); + io.mapOptional("size", keys->_size, (uint64_t)keys->_content.size()); + io.mapOptional("interposable", keys->_interpose, + DefinedAtom::interposeNo); + io.mapOptional("merge", keys->_merge, DefinedAtom::mergeNo); + io.mapOptional("alignment", keys->_alignment, + DefinedAtom::Alignment(1)); + io.mapOptional("section-choice", keys->_sectionChoice, + DefinedAtom::sectionBasedOnContent); + io.mapOptional("section-name", keys->_sectionName, StringRef()); + io.mapOptional("section-size", keys->_sectionSize, (uint64_t)0); + io.mapOptional("dead-strip", keys->_deadStrip, + DefinedAtom::deadStripNormal); + io.mapOptional("dynamic-export", keys->_dynamicExport, + DefinedAtom::dynamicExportNormal); + io.mapOptional("code-model", keys->_codeModel, DefinedAtom::codeNA); + // default permissions based on content type + io.mapOptional("permissions", keys->_permissions, + DefinedAtom::permissions( + keys->_contentType)); + io.mapOptional("references", keys->_references); + } +}; + +template <> struct MappingTraits<lld::DefinedAtom *> { + static void mapping(IO &io, lld::DefinedAtom *&atom) { + const lld::DefinedAtom *atomPtr = atom; + MappingTraits<const lld::DefinedAtom *>::mapping(io, atomPtr); + atom = const_cast<lld::DefinedAtom *>(atomPtr); + } +}; + +// YAML conversion for const lld::UndefinedAtom* +template <> struct MappingTraits<const lld::UndefinedAtom *> { + class NormalizedAtom : public lld::UndefinedAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _canBeNull(canBeNullNever) {} + + NormalizedAtom(IO &io, const lld::UndefinedAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _canBeNull(atom->canBeNull()) {} + + ~NormalizedAtom() override = default; + + const lld::UndefinedAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created UndefinedAtom named: '" << _name + << "' (" << (const void *)_name.data() << ", " + << _name.size() << ")\n"); + return this; + } + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + CanBeNull canBeNull() const override { return _canBeNull; } + + const lld::File &_file; + StringRef _name; + CanBeNull _canBeNull; + }; + + static void mapping(IO &io, const lld::UndefinedAtom *&atom) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedAtom, const lld::UndefinedAtom *> keys( + io, atom, &info->_file->allocator()); + + io.mapRequired("name", keys->_name); + io.mapOptional("can-be-null", keys->_canBeNull, + lld::UndefinedAtom::canBeNullNever); + } +}; + +template <> struct MappingTraits<lld::UndefinedAtom *> { + static void mapping(IO &io, lld::UndefinedAtom *&atom) { + const lld::UndefinedAtom *atomPtr = atom; + MappingTraits<const lld::UndefinedAtom *>::mapping(io, atomPtr); + atom = const_cast<lld::UndefinedAtom *>(atomPtr); + } +}; + +// YAML conversion for const lld::SharedLibraryAtom* +template <> struct MappingTraits<const lld::SharedLibraryAtom *> { + class NormalizedAtom : public lld::SharedLibraryAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _canBeNull(false), + _type(Type::Unknown), _size(0) {} + + NormalizedAtom(IO &io, const lld::SharedLibraryAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _loadName(atom->loadName()), _canBeNull(atom->canBeNullAtRuntime()), + _type(atom->type()), _size(atom->size()) {} + + ~NormalizedAtom() override = default; + + const lld::SharedLibraryAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + if (!_loadName.empty()) + _loadName = f->copyString(_loadName); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created SharedLibraryAtom named: '" + << _name << "' (" + << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + StringRef loadName() const override { return _loadName; } + bool canBeNullAtRuntime() const override { return _canBeNull; } + Type type() const override { return _type; } + uint64_t size() const override { return _size; } + + const lld::File &_file; + StringRef _name; + StringRef _loadName; + ShlibCanBeNull _canBeNull; + Type _type; + uint64_t _size; + }; + + static void mapping(IO &io, const lld::SharedLibraryAtom *&atom) { + + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedAtom, const lld::SharedLibraryAtom *> + keys(io, atom, &info->_file->allocator()); + + io.mapRequired("name", keys->_name); + io.mapOptional("load-name", keys->_loadName); + io.mapOptional("can-be-null", keys->_canBeNull, (ShlibCanBeNull) false); + io.mapOptional("type", keys->_type, SharedLibraryAtom::Type::Code); + io.mapOptional("size", keys->_size, uint64_t(0)); + } +}; + +template <> struct MappingTraits<lld::SharedLibraryAtom *> { + static void mapping(IO &io, lld::SharedLibraryAtom *&atom) { + const lld::SharedLibraryAtom *atomPtr = atom; + MappingTraits<const lld::SharedLibraryAtom *>::mapping(io, atomPtr); + atom = const_cast<lld::SharedLibraryAtom *>(atomPtr); + } +}; + +// YAML conversion for const lld::AbsoluteAtom* +template <> struct MappingTraits<const lld::AbsoluteAtom *> { + class NormalizedAtom : public lld::AbsoluteAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _scope(), _value(0) {} + + NormalizedAtom(IO &io, const lld::AbsoluteAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _scope(atom->scope()), _value(atom->value()) {} + + ~NormalizedAtom() override = default; + + const lld::AbsoluteAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created AbsoluteAtom named: '" << _name + << "' (" << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + uint64_t value() const override { return _value; } + Scope scope() const override { return _scope; } + + const lld::File &_file; + StringRef _name; + StringRef _refName; + Scope _scope; + Hex64 _value; + }; + + static void mapping(IO &io, const lld::AbsoluteAtom *&atom) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedAtom, const lld::AbsoluteAtom *> keys( + io, atom, &info->_file->allocator()); + + if (io.outputting()) { + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + assert(f); + assert(f->_rnb); + if (f->_rnb->hasRefName(atom)) { + keys->_refName = f->_rnb->refName(atom); + } + } + + io.mapRequired("name", keys->_name); + io.mapOptional("ref-name", keys->_refName, StringRef()); + io.mapOptional("scope", keys->_scope); + io.mapRequired("value", keys->_value); + } +}; + +template <> struct MappingTraits<lld::AbsoluteAtom *> { + static void mapping(IO &io, lld::AbsoluteAtom *&atom) { + const lld::AbsoluteAtom *atomPtr = atom; + MappingTraits<const lld::AbsoluteAtom *>::mapping(io, atomPtr); + atom = const_cast<lld::AbsoluteAtom *>(atomPtr); + } +}; + +} // end namespace llvm +} // end namespace yaml + +RefNameResolver::RefNameResolver(const lld::File *file, IO &io) : _io(io) { + typedef MappingTraits<const lld::DefinedAtom *>::NormalizedAtom + NormalizedAtom; + for (const lld::DefinedAtom *a : file->defined()) { + const auto *na = (const NormalizedAtom *)a; + if (!na->_refName.empty()) + add(na->_refName, a); + else if (!na->_name.empty()) + add(na->_name, a); + } + + for (const lld::UndefinedAtom *a : file->undefined()) + add(a->name(), a); + + for (const lld::SharedLibraryAtom *a : file->sharedLibrary()) + add(a->name(), a); + + typedef MappingTraits<const lld::AbsoluteAtom *>::NormalizedAtom NormAbsAtom; + for (const lld::AbsoluteAtom *a : file->absolute()) { + const auto *na = (const NormAbsAtom *)a; + if (na->_refName.empty()) + add(na->_name, a); + else + add(na->_refName, a); + } +} + +inline const lld::File * +MappingTraits<const lld::File *>::NormalizedFile::denormalize(IO &io) { + typedef MappingTraits<const lld::DefinedAtom *>::NormalizedAtom + NormalizedAtom; + + RefNameResolver nameResolver(this, io); + // Now that all atoms are parsed, references can be bound. + for (const lld::DefinedAtom *a : this->defined()) { + auto *normAtom = (NormalizedAtom *)const_cast<DefinedAtom *>(a); + normAtom->bind(nameResolver); + } + + return this; +} + +inline void MappingTraits<const lld::DefinedAtom *>::NormalizedAtom::bind( + const RefNameResolver &resolver) { + typedef MappingTraits<const lld::Reference *>::NormalizedReference + NormalizedReference; + for (const lld::Reference *ref : _references) { + auto *normRef = (NormalizedReference *)const_cast<Reference *>(ref); + normRef->bind(resolver); + } +} + +inline void MappingTraits<const lld::Reference *>::NormalizedReference::bind( + const RefNameResolver &resolver) { + _target = resolver.lookup(_targetName); +} + +inline StringRef +MappingTraits<const lld::Reference *>::NormalizedReference::targetName( + IO &io, const lld::Reference *ref) { + if (ref->target() == nullptr) + return StringRef(); + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + RefNameBuilder &rnb = *f->_rnb; + if (rnb.hasRefName(ref->target())) + return rnb.refName(ref->target()); + return ref->target()->name(); +} + +namespace lld { +namespace yaml { + +class Writer : public lld::Writer { +public: + Writer(const LinkingContext &context) : _ctx(context) {} + + llvm::Error writeFile(const lld::File &file, StringRef outPath) override { + // Create stream to path. + std::error_code ec; + llvm::raw_fd_ostream out(outPath, ec, llvm::sys::fs::F_Text); + if (ec) + return llvm::errorCodeToError(ec); + + // Create yaml Output writer, using yaml options for context. + YamlContext yamlContext; + yamlContext._ctx = &_ctx; + yamlContext._registry = &_ctx.registry(); + llvm::yaml::Output yout(out, &yamlContext); + + // Write yaml output. + const lld::File *fileRef = &file; + yout << fileRef; + + return llvm::Error::success(); + } + +private: + const LinkingContext &_ctx; +}; + +} // end namespace yaml + +namespace { + +/// Handles !native tagged yaml documents. +class NativeYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override { + if (io.mapTag("!native")) { + MappingTraits<const lld::File *>::mappingAtoms(io, file); + return true; + } + return false; + } +}; + +/// Handles !archive tagged yaml documents. +class ArchiveYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override { + if (io.mapTag("!archive")) { + MappingTraits<const lld::File *>::mappingArchive(io, file); + return true; + } + return false; + } +}; + +class YAMLReader : public Reader { +public: + YAMLReader(const Registry ®istry) : _registry(registry) {} + + bool canParse(file_magic magic, MemoryBufferRef mb) const override { + StringRef name = mb.getBufferIdentifier(); + return name.endswith(".objtxt") || name.endswith(".yaml"); + } + + ErrorOr<std::unique_ptr<File>> + loadFile(std::unique_ptr<MemoryBuffer> mb, + const class Registry &) const override { + // Create YAML Input Reader. + YamlContext yamlContext; + yamlContext._registry = &_registry; + yamlContext._path = mb->getBufferIdentifier(); + llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); + + // Fill vector with File objects created by parsing yaml. + std::vector<const lld::File *> createdFiles; + yin >> createdFiles; + assert(createdFiles.size() == 1); + + // Error out now if there were parsing errors. + if (yin.error()) + return make_error_code(lld::YamlReaderError::illegal_value); + + std::shared_ptr<MemoryBuffer> smb(mb.release()); + const File *file = createdFiles[0]; + // Note: loadFile() should return vector of *const* File + File *f = const_cast<File *>(file); + f->setLastError(std::error_code()); + f->setSharedMemoryBuffer(smb); + return std::unique_ptr<File>(f); + } + +private: + const Registry &_registry; +}; + +} // end anonymous namespace + +void Registry::addSupportYamlFiles() { + add(std::unique_ptr<Reader>(new YAMLReader(*this))); + add(std::unique_ptr<YamlIOTaggedDocumentHandler>( + new NativeYamlIOTaggedDocumentHandler())); + add(std::unique_ptr<YamlIOTaggedDocumentHandler>( + new ArchiveYamlIOTaggedDocumentHandler())); +} + +std::unique_ptr<Writer> createWriterYAML(const LinkingContext &context) { + return std::unique_ptr<Writer>(new lld::yaml::Writer(context)); +} + +} // end namespace lld diff --git a/contrib/llvm/tools/lld/tools/lld/CMakeLists.txt b/contrib/llvm/tools/lld/tools/lld/CMakeLists.txt new file mode 100644 index 000000000000..2df10697ff66 --- /dev/null +++ b/contrib/llvm/tools/lld/tools/lld/CMakeLists.txt @@ -0,0 +1,24 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_lld_tool(lld + lld.cpp + ) + +target_link_libraries(lld + lldDriver + lldCOFF + lldELF + ) + +install(TARGETS lld + RUNTIME DESTINATION bin) + +if(NOT LLD_SYMLINKS_TO_CREATE) + set(LLD_SYMLINKS_TO_CREATE lld-link ld.lld) +endif() + +foreach(link ${LLD_SYMLINKS_TO_CREATE}) + add_lld_symlink(${link} lld) +endforeach() diff --git a/contrib/llvm/tools/lld/tools/lld/lld.cpp b/contrib/llvm/tools/lld/tools/lld/lld.cpp new file mode 100644 index 000000000000..f42ccfe3d36a --- /dev/null +++ b/contrib/llvm/tools/lld/tools/lld/lld.cpp @@ -0,0 +1,103 @@ +//===- tools/lld/lld.cpp - Linker Driver Dispatcher -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the entry point to the lld driver. This is a thin wrapper which +// dispatches to the given platform specific driver. +// +// If there is -flavor option, it is dispatched according to the arguments. +// If the flavor parameter is not present, then it is dispatched according +// to argv[0]. +// +//===----------------------------------------------------------------------===// + +#include "lld/Driver/Driver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Signals.h" + +using namespace lld; +using namespace llvm; +using namespace llvm::sys; + +enum Flavor { + Invalid, + Gnu, // -flavor gnu + WinLink, // -flavor link + Darwin, // -flavor darwin +}; + +LLVM_ATTRIBUTE_NORETURN static void die(const Twine &S) { + errs() << S << "\n"; + exit(1); +} + +static Flavor getFlavor(StringRef S) { + return StringSwitch<Flavor>(S) + .Cases("ld", "ld.lld", "gnu", Gnu) + .Case("link", WinLink) + .Case("darwin", Darwin) + .Default(Invalid); +} + +static Flavor parseProgname(StringRef Progname) { +#if __APPLE__ + // Use Darwin driver for "ld" on Darwin. + if (Progname == "ld") + return Darwin; +#endif + +#if LLVM_ON_UNIX + // Use GNU driver for "ld" on other Unix-like system. + if (Progname == "ld") + return Gnu; +#endif + + // Progname may be something like "lld-gnu". Parse it. + SmallVector<StringRef, 3> V; + Progname.split(V, "-"); + for (StringRef S : V) + if (Flavor F = getFlavor(S)) + return F; + return Invalid; +} + +static Flavor parseFlavor(std::vector<const char *> &V) { + // Parse -flavor option. + if (V.size() > 1 && V[1] == StringRef("-flavor")) { + if (V.size() <= 2) + die("missing arg value for '-flavor'"); + Flavor F = getFlavor(V[2]); + if (F == Invalid) + die("Unknown flavor: " + StringRef(V[2])); + V.erase(V.begin() + 1, V.begin() + 3); + return F; + } + + // Deduct the flavor from argv[0]. + StringRef Arg0 = path::filename(V[0]); + if (Arg0.endswith_lower(".exe")) + Arg0 = Arg0.drop_back(4); + return parseProgname(Arg0); +} + +/// Universal linker main(). This linker emulates the gnu, darwin, or +/// windows linker based on the argv[0] or -flavor option. +int main(int Argc, const char **Argv) { + // Standard set up, so program fails gracefully. + sys::PrintStackTraceOnErrorSignal(Argv[0]); + PrettyStackTraceProgram StackPrinter(Argc, Argv); + llvm_shutdown_obj Shutdown; + + std::vector<const char *> Args(Argv, Argv + Argc); + return !elf::link(Args, true); +} |
