diff options
Diffstat (limited to 'contrib/llvm/tools/lld/ELF')
47 files changed, 19984 insertions, 0 deletions
diff --git a/contrib/llvm/tools/lld/ELF/CMakeLists.txt b/contrib/llvm/tools/lld/ELF/CMakeLists.txt new file mode 100644 index 000000000000..2e9d2b941fd9 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/CMakeLists.txt @@ -0,0 +1,61 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(ELFOptionsTableGen) + +if(NOT LLD_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + +add_lld_library(lldELF + Driver.cpp + DriverUtils.cpp + EhFrame.cpp + Error.cpp + GdbIndex.cpp + ICF.cpp + InputFiles.cpp + InputSection.cpp + LTO.cpp + LinkerScript.cpp + MarkLive.cpp + Mips.cpp + OutputSections.cpp + Relocations.cpp + ScriptParser.cpp + Strings.cpp + SymbolTable.cpp + Symbols.cpp + SyntheticSections.cpp + Target.cpp + Thunks.cpp + Writer.cpp + + LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Analysis + BitReader + BitWriter + Codegen + Core + DebugInfoDWARF + Demangle + IPO + Linker + LTO + Object + Option + Passes + MC + Support + Target + TransformUtils + + LINK_LIBS + lldConfig + lldCore + ${PTHREAD_LIB} + + DEPENDS + ELFOptionsTableGen + ${tablegen_deps} + ) diff --git a/contrib/llvm/tools/lld/ELF/Config.h b/contrib/llvm/tools/lld/ELF/Config.h new file mode 100644 index 000000000000..b828cdb25047 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Config.h @@ -0,0 +1,167 @@ +//===- Config.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_CONFIG_H +#define LLD_ELF_CONFIG_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/ELF.h" + +#include <vector> + +namespace lld { +namespace elf { + +class InputFile; +struct Symbol; + +enum ELFKind { + ELFNoneKind, + ELF32LEKind, + ELF32BEKind, + ELF64LEKind, + ELF64BEKind +}; + +// For --build-id. +enum class BuildIdKind { None, Fast, Md5, Sha1, Hexstring, Uuid }; + +// For --discard-{all,locals,none} and --retain-symbols-file. +enum class DiscardPolicy { Default, All, Locals, RetainFile, None }; + +// For --strip-{all,debug}. +enum class StripPolicy { None, All, Debug }; + +// For --unresolved-symbols. +enum class UnresolvedPolicy { NoUndef, ReportError, Warn, Ignore }; + +// For --sort-section and linkerscript sorting rules. +enum class SortSectionPolicy { Default, None, Alignment, Name, Priority }; + +// For --target2 +enum class Target2Policy { Abs, Rel, GotRel }; + +struct SymbolVersion { + llvm::StringRef Name; + bool IsExternCpp; + bool HasWildcard; +}; + +// This struct contains symbols version definition that +// can be found in version script if it is used for link. +struct VersionDefinition { + VersionDefinition(llvm::StringRef Name, uint16_t Id) : Name(Name), Id(Id) {} + llvm::StringRef Name; + uint16_t Id; + std::vector<SymbolVersion> Globals; + size_t NameOff; // Offset in string table. +}; + +// This struct contains the global configuration for the linker. +// Most fields are direct mapping from the command line options +// and such fields have the same name as the corresponding options. +// Most fields are initialized by the driver. +struct Configuration { + InputFile *FirstElf = nullptr; + uint8_t OSABI = 0; + llvm::StringMap<uint64_t> SectionStartMap; + llvm::StringRef DynamicLinker; + llvm::StringRef Entry; + llvm::StringRef Emulation; + llvm::StringRef Fini; + llvm::StringRef Init; + llvm::StringRef LTOAAPipeline; + llvm::StringRef LTONewPmPasses; + llvm::StringRef OutputFile; + llvm::StringRef SoName; + llvm::StringRef Sysroot; + llvm::StringSet<> RetainSymbolsFile; + std::string RPath; + std::vector<VersionDefinition> VersionDefinitions; + std::vector<llvm::StringRef> AuxiliaryList; + std::vector<llvm::StringRef> SearchPaths; + std::vector<llvm::StringRef> SymbolOrderingFile; + std::vector<llvm::StringRef> Undefined; + std::vector<SymbolVersion> VersionScriptGlobals; + std::vector<SymbolVersion> VersionScriptLocals; + std::vector<uint8_t> BuildIdVector; + bool AllowMultipleDefinition; + bool AsNeeded = false; + bool Bsymbolic; + bool BsymbolicFunctions; + bool ColorDiagnostics = false; + bool Demangle = true; + bool DisableVerify; + bool EhFrameHdr; + bool EnableNewDtags; + bool ExportDynamic; + bool FatalWarnings; + bool GcSections; + bool GdbIndex; + bool GnuHash = false; + bool ICF; + bool Mips64EL = false; + bool MipsN32Abi = false; + bool NoGnuUnique; + bool NoUndefinedVersion; + bool Nostdlib; + bool OFormatBinary; + bool OMagic; + bool Pic; + bool Pie; + bool PrintGcSections; + bool Rela; + bool Relocatable; + bool SaveTemps; + bool SingleRoRx; + bool Shared; + bool Static = false; + bool SysvHash = true; + bool Target1Rel; + bool Threads; + bool Trace; + bool Verbose; + bool WarnCommon; + bool WarnMissingEntry; + bool ZCombreloc; + bool ZExecstack; + bool ZNodelete; + bool ZNow; + bool ZOrigin; + bool ZRelro; + bool ExitEarly; + bool ZWxneeded; + DiscardPolicy Discard; + SortSectionPolicy SortSection; + StripPolicy Strip = StripPolicy::None; + UnresolvedPolicy UnresolvedSymbols; + Target2Policy Target2 = Target2Policy::GotRel; + BuildIdKind BuildId = BuildIdKind::None; + ELFKind EKind = ELFNoneKind; + uint16_t DefaultSymbolVersion = llvm::ELF::VER_NDX_GLOBAL; + uint16_t EMachine = llvm::ELF::EM_NONE; + uint64_t ErrorLimit = 20; + uint64_t ImageBase; + uint64_t MaxPageSize; + uint64_t ZStackSize; + unsigned LTOPartitions; + unsigned LTOO; + unsigned Optimize; + unsigned ThinLTOJobs; +}; + +// The only instance of Configuration struct. +extern Configuration *Config; + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Driver.cpp b/contrib/llvm/tools/lld/ELF/Driver.cpp new file mode 100644 index 000000000000..c8ea821ec522 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Driver.cpp @@ -0,0 +1,826 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Config.h" +#include "Error.h" +#include "ICF.h" +#include "InputFiles.h" +#include "InputSection.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "Target.h" +#include "Threads.h" +#include "Writer.h" +#include "lld/Config/Version.h" +#include "lld/Driver/Driver.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/TarWriter.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdlib> +#include <utility> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::sys; + +using namespace lld; +using namespace lld::elf; + +Configuration *elf::Config; +LinkerDriver *elf::Driver; + +BumpPtrAllocator elf::BAlloc; +StringSaver elf::Saver{BAlloc}; +std::vector<SpecificAllocBase *> elf::SpecificAllocBase::Instances; + +bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly, + raw_ostream &Error) { + ErrorCount = 0; + ErrorOS = &Error; + Argv0 = Args[0]; + Tar = nullptr; + + Config = make<Configuration>(); + Driver = make<LinkerDriver>(); + ScriptConfig = make<ScriptConfiguration>(); + + Driver->main(Args, CanExitEarly); + freeArena(); + return !ErrorCount; +} + +// Parses a linker -m option. +static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef Emul) { + uint8_t OSABI = 0; + StringRef S = Emul; + if (S.endswith("_fbsd")) { + S = S.drop_back(5); + OSABI = ELFOSABI_FREEBSD; + } + + std::pair<ELFKind, uint16_t> Ret = + StringSwitch<std::pair<ELFKind, uint16_t>>(S) + .Cases("aarch64elf", "aarch64linux", {ELF64LEKind, EM_AARCH64}) + .Case("armelf_linux_eabi", {ELF32LEKind, EM_ARM}) + .Case("elf32_x86_64", {ELF32LEKind, EM_X86_64}) + .Case("elf32btsmip", {ELF32BEKind, EM_MIPS}) + .Case("elf32ltsmip", {ELF32LEKind, EM_MIPS}) + .Case("elf32btsmipn32", {ELF32BEKind, EM_MIPS}) + .Case("elf32ltsmipn32", {ELF32LEKind, EM_MIPS}) + .Case("elf32ppc", {ELF32BEKind, EM_PPC}) + .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) + .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) + .Case("elf64ppc", {ELF64BEKind, EM_PPC64}) + .Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64}) + .Case("elf_i386", {ELF32LEKind, EM_386}) + .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU}) + .Default({ELFNoneKind, EM_NONE}); + + if (Ret.first == ELFNoneKind) { + if (S == "i386pe" || S == "i386pep" || S == "thumb2pe") + error("Windows targets are not supported on the ELF frontend: " + Emul); + else + error("unknown emulation: " + Emul); + } + return std::make_tuple(Ret.first, Ret.second, OSABI); +} + +// Returns slices of MB by parsing MB as an archive file. +// Each slice consists of a member file in the archive. +std::vector<MemoryBufferRef> +LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { + std::unique_ptr<Archive> File = + check(Archive::create(MB), + MB.getBufferIdentifier() + ": failed to parse archive"); + + std::vector<MemoryBufferRef> V; + Error Err = Error::success(); + for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { + Archive::Child C = + check(COrErr, MB.getBufferIdentifier() + + ": could not get the child of the archive"); + MemoryBufferRef MBRef = + check(C.getMemoryBufferRef(), + MB.getBufferIdentifier() + + ": could not get the buffer for a child of the archive"); + V.push_back(MBRef); + } + if (Err) + fatal(MB.getBufferIdentifier() + ": Archive::children failed: " + + toString(std::move(Err))); + + // Take ownership of memory buffers created for members of thin archives. + for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers()) + make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); + + return V; +} + +// Opens and parses a file. Path has to be resolved already. +// Newly created memory buffers are owned by this driver. +void LinkerDriver::addFile(StringRef Path) { + using namespace sys::fs; + + Optional<MemoryBufferRef> Buffer = readFile(Path); + if (!Buffer.hasValue()) + return; + MemoryBufferRef MBRef = *Buffer; + + if (InBinary) { + Files.push_back(make<BinaryFile>(MBRef)); + return; + } + + switch (identify_magic(MBRef.getBuffer())) { + case file_magic::unknown: + readLinkerScript(MBRef); + return; + case file_magic::archive: + if (InWholeArchive) { + for (MemoryBufferRef MB : getArchiveMembers(MBRef)) + Files.push_back(createObjectFile(MB, Path)); + return; + } + Files.push_back(make<ArchiveFile>(MBRef)); + return; + case file_magic::elf_shared_object: + if (Config->Relocatable) { + error("attempted static link of dynamic object " + Path); + return; + } + Files.push_back(createSharedFile(MBRef)); + return; + default: + if (InLib) + Files.push_back(make<LazyObjectFile>(MBRef)); + else + Files.push_back(createObjectFile(MBRef)); + } +} + +// Add a given library by searching it from input search paths. +void LinkerDriver::addLibrary(StringRef Name) { + if (Optional<std::string> Path = searchLibrary(Name)) + addFile(*Path); + else + error("unable to find library -l" + Name); +} + +// This function is called on startup. We need this for LTO since +// LTO calls LLVM functions to compile bitcode files to native code. +// Technically this can be delayed until we read bitcode files, but +// we don't bother to do lazily because the initialization is fast. +static void initLLVM(opt::InputArgList &Args) { + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + + // Parse and evaluate -mllvm options. + std::vector<const char *> V; + V.push_back("lld (LLVM option parsing)"); + for (auto *Arg : Args.filtered(OPT_mllvm)) + V.push_back(Arg->getValue()); + cl::ParseCommandLineOptions(V.size(), V.data()); +} + +// Some command line options or some combinations of them are not allowed. +// This function checks for such errors. +static void checkOptions(opt::InputArgList &Args) { + // The MIPS ABI as of 2016 does not support the GNU-style symbol lookup + // table which is a relatively new feature. + if (Config->EMachine == EM_MIPS && Config->GnuHash) + error("the .gnu.hash section is not compatible with the MIPS target."); + + if (Config->Pie && Config->Shared) + error("-shared and -pie may not be used together"); + + if (Config->Relocatable) { + if (Config->Shared) + error("-r and -shared may not be used together"); + if (Config->GcSections) + error("-r and --gc-sections may not be used together"); + if (Config->ICF) + error("-r and --icf may not be used together"); + if (Config->Pie) + error("-r and -pie may not be used together"); + } +} + +static StringRef getString(opt::InputArgList &Args, unsigned Key, + StringRef Default = "") { + if (auto *Arg = Args.getLastArg(Key)) + return Arg->getValue(); + return Default; +} + +static int getInteger(opt::InputArgList &Args, unsigned Key, int Default) { + int V = Default; + if (auto *Arg = Args.getLastArg(Key)) { + StringRef S = Arg->getValue(); + if (S.getAsInteger(10, V)) + error(Arg->getSpelling() + ": number expected, but got " + S); + } + return V; +} + +static const char *getReproduceOption(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_reproduce)) + return Arg->getValue(); + return getenv("LLD_REPRODUCE"); +} + +static bool hasZOption(opt::InputArgList &Args, StringRef Key) { + for (auto *Arg : Args.filtered(OPT_z)) + if (Key == Arg->getValue()) + return true; + return false; +} + +static uint64_t getZOptionValue(opt::InputArgList &Args, StringRef Key, + uint64_t Default) { + for (auto *Arg : Args.filtered(OPT_z)) { + StringRef Value = Arg->getValue(); + size_t Pos = Value.find("="); + if (Pos != StringRef::npos && Key == Value.substr(0, Pos)) { + Value = Value.substr(Pos + 1); + uint64_t Result; + if (Value.getAsInteger(0, Result)) + error("invalid " + Key + ": " + Value); + return Result; + } + } + return Default; +} + +void LinkerDriver::main(ArrayRef<const char *> ArgsArr, bool CanExitEarly) { + ELFOptTable Parser; + opt::InputArgList Args = Parser.parse(ArgsArr.slice(1)); + + // Interpret this flag early because error() depends on them. + Config->ErrorLimit = getInteger(Args, OPT_error_limit, 20); + + // Handle -help + if (Args.hasArg(OPT_help)) { + printHelp(ArgsArr[0]); + return; + } + + // GNU linkers disagree here. Though both -version and -v are mentioned + // in help to print the version information, GNU ld just normally exits, + // while gold can continue linking. We are compatible with ld.bfd here. + if (Args.hasArg(OPT_version) || Args.hasArg(OPT_v)) + outs() << getLLDVersion() << "\n"; + if (Args.hasArg(OPT_version)) + return; + + Config->ExitEarly = CanExitEarly && !Args.hasArg(OPT_full_shutdown); + + if (const char *Path = getReproduceOption(Args)) { + // Note that --reproduce is a debug option so you can ignore it + // if you are trying to understand the whole picture of the code. + Expected<std::unique_ptr<TarWriter>> ErrOrWriter = + TarWriter::create(Path, path::stem(Path)); + if (ErrOrWriter) { + Tar = ErrOrWriter->get(); + Tar->append("response.txt", createResponseFile(Args)); + Tar->append("version.txt", getLLDVersion() + "\n"); + make<std::unique_ptr<TarWriter>>(std::move(*ErrOrWriter)); + } else { + error(Twine("--reproduce: failed to open ") + Path + ": " + + toString(ErrOrWriter.takeError())); + } + } + + readConfigs(Args); + initLLVM(Args); + createFiles(Args); + inferMachineType(); + checkOptions(Args); + if (ErrorCount) + return; + + switch (Config->EKind) { + case ELF32LEKind: + link<ELF32LE>(Args); + return; + case ELF32BEKind: + link<ELF32BE>(Args); + return; + case ELF64LEKind: + link<ELF64LE>(Args); + return; + case ELF64BEKind: + link<ELF64BE>(Args); + return; + default: + llvm_unreachable("unknown Config->EKind"); + } +} + +static UnresolvedPolicy getUnresolvedSymbolOption(opt::InputArgList &Args) { + if (Args.hasArg(OPT_noinhibit_exec)) + return UnresolvedPolicy::Warn; + if (Args.hasArg(OPT_no_undefined) || hasZOption(Args, "defs")) + return UnresolvedPolicy::NoUndef; + if (Config->Relocatable) + return UnresolvedPolicy::Ignore; + + if (auto *Arg = Args.getLastArg(OPT_unresolved_symbols)) { + StringRef S = Arg->getValue(); + if (S == "ignore-all" || S == "ignore-in-object-files") + return UnresolvedPolicy::Ignore; + if (S == "ignore-in-shared-libs" || S == "report-all") + return UnresolvedPolicy::ReportError; + error("unknown --unresolved-symbols value: " + S); + } + return UnresolvedPolicy::ReportError; +} + +static Target2Policy getTarget2Option(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_target2)) { + StringRef S = Arg->getValue(); + if (S == "rel") + return Target2Policy::Rel; + if (S == "abs") + return Target2Policy::Abs; + if (S == "got-rel") + return Target2Policy::GotRel; + error("unknown --target2 option: " + S); + } + return Target2Policy::GotRel; +} + +static bool isOutputFormatBinary(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_oformat)) { + StringRef S = Arg->getValue(); + if (S == "binary") + return true; + error("unknown --oformat value: " + S); + } + return false; +} + +static bool getArg(opt::InputArgList &Args, unsigned K1, unsigned K2, + bool Default) { + if (auto *Arg = Args.getLastArg(K1, K2)) + return Arg->getOption().getID() == K1; + return Default; +} + +static DiscardPolicy getDiscardOption(opt::InputArgList &Args) { + if (Config->Relocatable) + return DiscardPolicy::None; + auto *Arg = + Args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none); + if (!Arg) + return DiscardPolicy::Default; + if (Arg->getOption().getID() == OPT_discard_all) + return DiscardPolicy::All; + if (Arg->getOption().getID() == OPT_discard_locals) + return DiscardPolicy::Locals; + return DiscardPolicy::None; +} + +static StripPolicy getStripOption(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_strip_all, OPT_strip_debug)) { + if (Arg->getOption().getID() == OPT_strip_all) + return StripPolicy::All; + return StripPolicy::Debug; + } + return StripPolicy::None; +} + +static uint64_t parseSectionAddress(StringRef S, opt::Arg *Arg) { + uint64_t VA = 0; + if (S.startswith("0x")) + S = S.drop_front(2); + if (S.getAsInteger(16, VA)) + error("invalid argument: " + toString(Arg)); + return VA; +} + +static StringMap<uint64_t> getSectionStartMap(opt::InputArgList &Args) { + StringMap<uint64_t> Ret; + for (auto *Arg : Args.filtered(OPT_section_start)) { + StringRef Name; + StringRef Addr; + std::tie(Name, Addr) = StringRef(Arg->getValue()).split('='); + Ret[Name] = parseSectionAddress(Addr, Arg); + } + + if (auto *Arg = Args.getLastArg(OPT_Ttext)) + Ret[".text"] = parseSectionAddress(Arg->getValue(), Arg); + if (auto *Arg = Args.getLastArg(OPT_Tdata)) + Ret[".data"] = parseSectionAddress(Arg->getValue(), Arg); + if (auto *Arg = Args.getLastArg(OPT_Tbss)) + Ret[".bss"] = parseSectionAddress(Arg->getValue(), Arg); + return Ret; +} + +static SortSectionPolicy getSortKind(opt::InputArgList &Args) { + StringRef S = getString(Args, OPT_sort_section); + if (S == "alignment") + return SortSectionPolicy::Alignment; + if (S == "name") + return SortSectionPolicy::Name; + if (!S.empty()) + error("unknown --sort-section rule: " + S); + return SortSectionPolicy::Default; +} + +static std::vector<StringRef> getLines(MemoryBufferRef MB) { + SmallVector<StringRef, 0> Arr; + MB.getBuffer().split(Arr, '\n'); + + std::vector<StringRef> Ret; + for (StringRef S : Arr) { + S = S.trim(); + if (!S.empty()) + Ret.push_back(S); + } + return Ret; +} + +// Initializes Config members by the command line options. +void LinkerDriver::readConfigs(opt::InputArgList &Args) { + for (auto *Arg : Args.filtered(OPT_L)) + Config->SearchPaths.push_back(Arg->getValue()); + + std::vector<StringRef> RPaths; + for (auto *Arg : Args.filtered(OPT_rpath)) + RPaths.push_back(Arg->getValue()); + if (!RPaths.empty()) + Config->RPath = llvm::join(RPaths.begin(), RPaths.end(), ":"); + + if (auto *Arg = Args.getLastArg(OPT_m)) { + // Parse ELF{32,64}{LE,BE} and CPU type. + StringRef S = Arg->getValue(); + std::tie(Config->EKind, Config->EMachine, Config->OSABI) = + parseEmulation(S); + Config->MipsN32Abi = (S == "elf32btsmipn32" || S == "elf32ltsmipn32"); + Config->Emulation = S; + } + + Config->AllowMultipleDefinition = Args.hasArg(OPT_allow_multiple_definition); + Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic); + Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions); + Config->Demangle = getArg(Args, OPT_demangle, OPT_no_demangle, true); + Config->DisableVerify = Args.hasArg(OPT_disable_verify); + Config->EhFrameHdr = Args.hasArg(OPT_eh_frame_hdr); + Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags); + Config->ExportDynamic = Args.hasArg(OPT_export_dynamic); + Config->FatalWarnings = Args.hasArg(OPT_fatal_warnings); + Config->GcSections = getArg(Args, OPT_gc_sections, OPT_no_gc_sections, false); + Config->GdbIndex = Args.hasArg(OPT_gdb_index); + Config->ICF = Args.hasArg(OPT_icf); + Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique); + Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version); + Config->Nostdlib = Args.hasArg(OPT_nostdlib); + Config->OMagic = Args.hasArg(OPT_omagic); + Config->Pie = getArg(Args, OPT_pie, OPT_nopie, false); + Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections); + Config->Relocatable = Args.hasArg(OPT_relocatable); + Config->Discard = getDiscardOption(Args); + Config->SaveTemps = Args.hasArg(OPT_save_temps); + Config->SingleRoRx = Args.hasArg(OPT_no_rosegment); + Config->Shared = Args.hasArg(OPT_shared); + Config->Target1Rel = getArg(Args, OPT_target1_rel, OPT_target1_abs, false); + Config->Threads = getArg(Args, OPT_threads, OPT_no_threads, true); + Config->Trace = Args.hasArg(OPT_trace); + Config->Verbose = Args.hasArg(OPT_verbose); + Config->WarnCommon = Args.hasArg(OPT_warn_common); + + Config->DynamicLinker = getString(Args, OPT_dynamic_linker); + Config->Entry = getString(Args, OPT_entry); + Config->Fini = getString(Args, OPT_fini, "_fini"); + Config->Init = getString(Args, OPT_init, "_init"); + Config->LTOAAPipeline = getString(Args, OPT_lto_aa_pipeline); + Config->LTONewPmPasses = getString(Args, OPT_lto_newpm_passes); + Config->OutputFile = getString(Args, OPT_o); + Config->SoName = getString(Args, OPT_soname); + Config->Sysroot = getString(Args, OPT_sysroot); + + Config->Optimize = getInteger(Args, OPT_O, 1); + Config->LTOO = getInteger(Args, OPT_lto_O, 2); + if (Config->LTOO > 3) + error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O)); + Config->LTOPartitions = getInteger(Args, OPT_lto_partitions, 1); + if (Config->LTOPartitions == 0) + error("--lto-partitions: number of threads must be > 0"); + Config->ThinLTOJobs = getInteger(Args, OPT_thinlto_jobs, -1u); + if (Config->ThinLTOJobs == 0) + error("--thinlto-jobs: number of threads must be > 0"); + + Config->ZCombreloc = !hasZOption(Args, "nocombreloc"); + Config->ZExecstack = hasZOption(Args, "execstack"); + Config->ZNodelete = hasZOption(Args, "nodelete"); + Config->ZNow = hasZOption(Args, "now"); + Config->ZOrigin = hasZOption(Args, "origin"); + Config->ZRelro = !hasZOption(Args, "norelro"); + Config->ZStackSize = getZOptionValue(Args, "stack-size", -1); + Config->ZWxneeded = hasZOption(Args, "wxneeded"); + + Config->OFormatBinary = isOutputFormatBinary(Args); + Config->SectionStartMap = getSectionStartMap(Args); + Config->SortSection = getSortKind(Args); + Config->Target2 = getTarget2Option(Args); + Config->UnresolvedSymbols = getUnresolvedSymbolOption(Args); + + // --omagic is an option to create old-fashioned executables in which + // .text segments are writable. Today, the option is still in use to + // create special-purpose programs such as boot loaders. It doesn't + // make sense to create PT_GNU_RELRO for such executables. + if (Config->OMagic) + Config->ZRelro = false; + + if (!Config->Relocatable) + Config->Strip = getStripOption(Args); + + // Config->Pic is true if we are generating position-independent code. + Config->Pic = Config->Pie || Config->Shared; + + if (auto *Arg = Args.getLastArg(OPT_hash_style)) { + StringRef S = Arg->getValue(); + if (S == "gnu") { + Config->GnuHash = true; + Config->SysvHash = false; + } else if (S == "both") { + Config->GnuHash = true; + } else if (S != "sysv") + error("unknown hash style: " + S); + } + + // Parse --build-id or --build-id=<style>. + if (Args.hasArg(OPT_build_id)) + Config->BuildId = BuildIdKind::Fast; + if (auto *Arg = Args.getLastArg(OPT_build_id_eq)) { + StringRef S = Arg->getValue(); + if (S == "md5") { + Config->BuildId = BuildIdKind::Md5; + } else if (S == "sha1" || S == "tree") { + Config->BuildId = BuildIdKind::Sha1; + } else if (S == "uuid") { + Config->BuildId = BuildIdKind::Uuid; + } else if (S == "none") { + Config->BuildId = BuildIdKind::None; + } else if (S.startswith("0x")) { + Config->BuildId = BuildIdKind::Hexstring; + Config->BuildIdVector = parseHex(S.substr(2)); + } else { + error("unknown --build-id style: " + S); + } + } + + for (auto *Arg : Args.filtered(OPT_auxiliary)) + Config->AuxiliaryList.push_back(Arg->getValue()); + if (!Config->Shared && !Config->AuxiliaryList.empty()) + error("-f may not be used without -shared"); + + for (auto *Arg : Args.filtered(OPT_undefined)) + Config->Undefined.push_back(Arg->getValue()); + + if (auto *Arg = Args.getLastArg(OPT_dynamic_list)) + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + readDynamicList(*Buffer); + + if (auto *Arg = Args.getLastArg(OPT_symbol_ordering_file)) + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + Config->SymbolOrderingFile = getLines(*Buffer); + + // If --retain-symbol-file is used, we'll retail only the symbols listed in + // the file and discard all others. + if (auto *Arg = Args.getLastArg(OPT_retain_symbols_file)) { + Config->Discard = DiscardPolicy::RetainFile; + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + for (StringRef S : getLines(*Buffer)) + Config->RetainSymbolsFile.insert(S); + } + + for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol)) + Config->VersionScriptGlobals.push_back( + {Arg->getValue(), /*IsExternCpp*/ false, /*HasWildcard*/ false}); + + // Dynamic lists are a simplified linker script that doesn't need the + // "global:" and implicitly ends with a "local:*". Set the variables needed to + // simulate that. + if (Args.hasArg(OPT_dynamic_list) || Args.hasArg(OPT_export_dynamic_symbol)) { + Config->ExportDynamic = true; + if (!Config->Shared) + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + } + + if (auto *Arg = Args.getLastArg(OPT_version_script)) + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + readVersionScript(*Buffer); +} + +// Returns a value of "-format" option. +static bool getBinaryOption(StringRef S) { + if (S == "binary") + return true; + if (S == "elf" || S == "default") + return false; + error("unknown -format value: " + S + + " (supported formats: elf, default, binary)"); + return false; +} + +void LinkerDriver::createFiles(opt::InputArgList &Args) { + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_l: + addLibrary(Arg->getValue()); + break; + case OPT_INPUT: + addFile(Arg->getValue()); + break; + case OPT_alias_script_T: + case OPT_script: + if (Optional<MemoryBufferRef> MB = readFile(Arg->getValue())) + readLinkerScript(*MB); + break; + case OPT_as_needed: + Config->AsNeeded = true; + break; + case OPT_format: + InBinary = getBinaryOption(Arg->getValue()); + break; + case OPT_no_as_needed: + Config->AsNeeded = false; + break; + case OPT_Bstatic: + Config->Static = true; + break; + case OPT_Bdynamic: + Config->Static = false; + break; + case OPT_whole_archive: + InWholeArchive = true; + break; + case OPT_no_whole_archive: + InWholeArchive = false; + break; + case OPT_start_lib: + InLib = true; + break; + case OPT_end_lib: + InLib = false; + break; + } + } + + if (Files.empty() && ErrorCount == 0) + error("no input files"); +} + +// If -m <machine_type> was not given, infer it from object files. +void LinkerDriver::inferMachineType() { + if (Config->EKind != ELFNoneKind) + return; + + for (InputFile *F : Files) { + if (F->EKind == ELFNoneKind) + continue; + Config->EKind = F->EKind; + Config->EMachine = F->EMachine; + Config->OSABI = F->OSABI; + Config->MipsN32Abi = Config->EMachine == EM_MIPS && isMipsN32Abi(F); + return; + } + error("target emulation unknown: -m or at least one .o file required"); +} + +// Parse -z max-page-size=<value>. The default value is defined by +// each target. +static uint64_t getMaxPageSize(opt::InputArgList &Args) { + uint64_t Val = + getZOptionValue(Args, "max-page-size", Target->DefaultMaxPageSize); + if (!isPowerOf2_64(Val)) + error("max-page-size: value isn't a power of 2"); + return Val; +} + +// Parses -image-base option. +static uint64_t getImageBase(opt::InputArgList &Args) { + // Use default if no -image-base option is given. + // Because we are using "Target" here, this function + // has to be called after the variable is initialized. + auto *Arg = Args.getLastArg(OPT_image_base); + if (!Arg) + return Config->Pic ? 0 : Target->DefaultImageBase; + + StringRef S = Arg->getValue(); + uint64_t V; + if (S.getAsInteger(0, V)) { + error("-image-base: number expected, but got " + S); + return 0; + } + if ((V % Config->MaxPageSize) != 0) + warn("-image-base: address isn't multiple of page size: " + S); + return V; +} + +// Do actual linking. Note that when this function is called, +// all linker scripts have already been parsed. +template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { + SymbolTable<ELFT> Symtab; + elf::Symtab<ELFT>::X = &Symtab; + Target = createTarget(); + ScriptBase = Script<ELFT>::X = make<LinkerScript<ELFT>>(); + + Config->Rela = + ELFT::Is64Bits || Config->EMachine == EM_X86_64 || Config->MipsN32Abi; + Config->Mips64EL = + (Config->EMachine == EM_MIPS && Config->EKind == ELF64LEKind); + Config->MaxPageSize = getMaxPageSize(Args); + Config->ImageBase = getImageBase(Args); + + // Default output filename is "a.out" by the Unix tradition. + if (Config->OutputFile.empty()) + Config->OutputFile = "a.out"; + + // Use default entry point name if no name was given via the command + // line nor linker scripts. For some reason, MIPS entry point name is + // different from others. + Config->WarnMissingEntry = + (!Config->Entry.empty() || (!Config->Shared && !Config->Relocatable)); + if (Config->Entry.empty() && !Config->Relocatable) + Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; + + // Handle --trace-symbol. + for (auto *Arg : Args.filtered(OPT_trace_symbol)) + Symtab.trace(Arg->getValue()); + + // Add all files to the symbol table. This will add almost all + // symbols that we need to the symbol table. + for (InputFile *F : Files) + Symtab.addFile(F); + + // If an entry symbol is in a static archive, pull out that file now + // to complete the symbol table. After this, no new names except a + // few linker-synthesized ones will be added to the symbol table. + if (Symtab.find(Config->Entry)) + Symtab.addUndefined(Config->Entry); + + // Return if there were name resolution errors. + if (ErrorCount) + return; + + Symtab.scanUndefinedFlags(); + Symtab.scanShlibUndefined(); + Symtab.scanVersionScript(); + + Symtab.addCombinedLTOObject(); + if (ErrorCount) + return; + + for (auto *Arg : Args.filtered(OPT_wrap)) + Symtab.wrap(Arg->getValue()); + + // Now that we have a complete list of input files. + // Beyond this point, no new files are added. + // Aggregate all input sections into one place. + for (elf::ObjectFile<ELFT> *F : Symtab.getObjectFiles()) + for (InputSectionBase<ELFT> *S : F->getSections()) + if (S && S != &InputSection<ELFT>::Discarded) + Symtab.Sections.push_back(S); + for (BinaryFile *F : Symtab.getBinaryFiles()) + for (InputSectionData *S : F->getSections()) + Symtab.Sections.push_back(cast<InputSection<ELFT>>(S)); + + // Do size optimizations: garbage collection and identical code folding. + if (Config->GcSections) + markLive<ELFT>(); + if (Config->ICF) + doIcf<ELFT>(); + + // MergeInputSection::splitIntoPieces needs to be called before + // any call of MergeInputSection::getOffset. Do that. + forEach(Symtab.Sections.begin(), Symtab.Sections.end(), + [](InputSectionBase<ELFT> *S) { + if (!S->Live) + return; + if (S->isCompressed()) + S->uncompress(); + if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(S)) + MS->splitIntoPieces(); + }); + + // Write the result to the file. + writeResult<ELFT>(); +} diff --git a/contrib/llvm/tools/lld/ELF/Driver.h b/contrib/llvm/tools/lld/ELF/Driver.h new file mode 100644 index 000000000000..b600fae34823 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Driver.h @@ -0,0 +1,78 @@ +//===- Driver.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_DRIVER_H +#define LLD_ELF_DRIVER_H + +#include "SymbolTable.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/raw_ostream.h" + +namespace lld { +namespace elf { + +extern class LinkerDriver *Driver; + +class LinkerDriver { +public: + void main(ArrayRef<const char *> Args, bool CanExitEarly); + void addFile(StringRef Path); + void addLibrary(StringRef Name); + +private: + std::vector<MemoryBufferRef> getArchiveMembers(MemoryBufferRef MB); + void readConfigs(llvm::opt::InputArgList &Args); + void createFiles(llvm::opt::InputArgList &Args); + void inferMachineType(); + template <class ELFT> void link(llvm::opt::InputArgList &Args); + + // True if we are in --whole-archive and --no-whole-archive. + bool InWholeArchive = false; + + // True if we are in --start-lib and --end-lib. + bool InLib = false; + + // True if we are in -format=binary and -format=elf. + bool InBinary = false; + + std::vector<InputFile *> Files; +}; + +// Parses command line options. +class ELFOptTable : public llvm::opt::OptTable { +public: + ELFOptTable(); + llvm::opt::InputArgList parse(ArrayRef<const char *> Argv); +}; + +// Create enum with OPT_xxx values for each option in Options.td +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID, +#include "ELF/Options.inc" +#undef OPTION +}; + +void printHelp(const char *Argv0); +std::vector<uint8_t> parseHexstring(StringRef S); + +std::string createResponseFile(const llvm::opt::InputArgList &Args); + +llvm::Optional<std::string> findFromSearchPaths(StringRef Path); +llvm::Optional<std::string> searchLibrary(StringRef Path); + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/DriverUtils.cpp b/contrib/llvm/tools/lld/ELF/DriverUtils.cpp new file mode 100644 index 000000000000..ae76958de65e --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/DriverUtils.cpp @@ -0,0 +1,190 @@ +//===- DriverUtils.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains utility functions for the driver. Because there +// are so many small functions, we created this separate file to make +// Driver.cpp less cluttered. +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Error.h" +#include "Memory.h" +#include "ScriptParser.h" +#include "lld/Config/Version.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" + +using namespace llvm; +using namespace llvm::sys; + +using namespace lld; +using namespace lld::elf; + +// Create OptTable + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "ELF/Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const opt::OptTable::Info OptInfo[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ + {X1, X2, X9, X10, OPT_##ID, opt::Option::KIND##Class, \ + X8, X7, OPT_##GROUP, OPT_##ALIAS, X6}, +#include "ELF/Options.inc" +#undef OPTION +}; + +ELFOptTable::ELFOptTable() : OptTable(OptInfo) {} + +// Parse -color-diagnostics={auto,always,never} or -no-color-diagnostics. +static bool getColorDiagnostics(opt::InputArgList &Args) { + bool Default = (ErrorOS == &errs() && Process::StandardErrHasColors()); + + auto *Arg = Args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq, + OPT_no_color_diagnostics); + if (!Arg) + return Default; + if (Arg->getOption().getID() == OPT_color_diagnostics) + return true; + if (Arg->getOption().getID() == OPT_no_color_diagnostics) + return false; + + StringRef S = Arg->getValue(); + if (S == "auto") + return Default; + if (S == "always") + return true; + if (S != "never") + error("unknown option: -color-diagnostics=" + S); + return false; +} + +static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) { + StringRef S = Arg->getValue(); + if (S != "windows" && S != "posix") + error("invalid response file quoting: " + S); + if (S == "windows") + return cl::TokenizeWindowsCommandLine; + return cl::TokenizeGNUCommandLine; + } + if (Triple(sys::getProcessTriple()).getOS() == Triple::Win32) + return cl::TokenizeWindowsCommandLine; + return cl::TokenizeGNUCommandLine; +} + +// Parses a given list of options. +opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) { + // Make InputArgList from string vectors. + unsigned MissingIndex; + unsigned MissingCount; + SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size()); + + // We need to get the quoting style for response files before parsing all + // options so we parse here before and ignore all the options but + // --rsp-quoting. + opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount); + + // Expand response files (arguments in the form of @<filename>) + // and then parse the argument again. + cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), Vec); + Args = this->ParseArgs(Vec, MissingIndex, MissingCount); + + // Interpret -color-diagnostics early so that error messages + // for unknown flags are colored. + Config->ColorDiagnostics = getColorDiagnostics(Args); + if (MissingCount) + error(Twine(Args.getArgString(MissingIndex)) + ": missing argument"); + + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + error("unknown argument: " + Arg->getSpelling()); + return Args; +} + +void elf::printHelp(const char *Argv0) { + ELFOptTable Table; + Table.PrintHelp(outs(), Argv0, "lld", false); +} + +// Reconstructs command line arguments so that so that you can re-run +// the same command with the same inputs. This is for --reproduce. +std::string elf::createResponseFile(const opt::InputArgList &Args) { + SmallString<0> Data; + raw_svector_ostream OS(Data); + + // Copy the command line to the output while rewriting paths. + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_reproduce: + break; + case OPT_INPUT: + OS << quote(rewritePath(Arg->getValue())) << "\n"; + break; + case OPT_L: + case OPT_dynamic_list: + case OPT_rpath: + case OPT_alias_script_T: + case OPT_script: + case OPT_version_script: + OS << Arg->getSpelling() << " " << quote(rewritePath(Arg->getValue())) + << "\n"; + break; + default: + OS << toString(Arg) << "\n"; + } + } + return Data.str(); +} + +// Find a file by concatenating given paths. If a resulting path +// starts with "=", the character is replaced with a --sysroot value. +static Optional<std::string> findFile(StringRef Path1, const Twine &Path2) { + SmallString<128> S; + if (Path1.startswith("=")) + path::append(S, Config->Sysroot, Path1.substr(1), Path2); + else + path::append(S, Path1, Path2); + + if (fs::exists(S)) + return S.str().str(); + return None; +} + +Optional<std::string> elf::findFromSearchPaths(StringRef Path) { + for (StringRef Dir : Config->SearchPaths) + if (Optional<std::string> S = findFile(Dir, Path)) + return S; + return None; +} + +// This is for -lfoo. We'll look for libfoo.so or libfoo.a from +// search paths. +Optional<std::string> elf::searchLibrary(StringRef Name) { + if (Name.startswith(":")) + return findFromSearchPaths(Name.substr(1)); + + for (StringRef Dir : Config->SearchPaths) { + if (!Config->Static) + if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".so")) + return S; + if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".a")) + return S; + } + return None; +} diff --git a/contrib/llvm/tools/lld/ELF/EhFrame.cpp b/contrib/llvm/tools/lld/ELF/EhFrame.cpp new file mode 100644 index 000000000000..2428473d9012 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/EhFrame.cpp @@ -0,0 +1,214 @@ +//===- EhFrame.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// .eh_frame section contains information on how to unwind the stack when +// an exception is thrown. The section consists of sequence of CIE and FDE +// records. The linker needs to merge CIEs and associate FDEs to CIEs. +// That means the linker has to understand the format of the section. +// +// This file contains a few utility functions to read .eh_frame contents. +// +//===----------------------------------------------------------------------===// + +#include "EhFrame.h" +#include "Error.h" +#include "InputSection.h" +#include "Relocations.h" +#include "Strings.h" + +#include "llvm/Object/ELF.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::dwarf; +using namespace llvm::object; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +namespace { +template <class ELFT> class EhReader { +public: + EhReader(InputSectionBase<ELFT> *S, ArrayRef<uint8_t> D) : IS(S), D(D) {} + size_t readEhRecordSize(); + uint8_t getFdeEncoding(); + +private: + template <class P> void failOn(const P *Loc, const Twine &Msg) { + fatal(IS->getLocation((const uint8_t *)Loc - IS->Data.data()) + ": " + Msg); + } + + uint8_t readByte(); + void skipBytes(size_t Count); + StringRef readString(); + void skipLeb128(); + void skipAugP(); + + InputSectionBase<ELFT> *IS; + ArrayRef<uint8_t> D; +}; +} + +template <class ELFT> +size_t elf::readEhRecordSize(InputSectionBase<ELFT> *S, size_t Off) { + return EhReader<ELFT>(S, S->Data.slice(Off)).readEhRecordSize(); +} +// .eh_frame section is a sequence of records. Each record starts with +// a 4 byte length field. This function reads the length. +template <class ELFT> size_t EhReader<ELFT>::readEhRecordSize() { + const endianness E = ELFT::TargetEndianness; + if (D.size() < 4) + failOn(D.data(), "CIE/FDE too small"); + + // First 4 bytes of CIE/FDE is the size of the record. + // If it is 0xFFFFFFFF, the next 8 bytes contain the size instead, + // but we do not support that format yet. + uint64_t V = read32<E>(D.data()); + if (V == UINT32_MAX) + failOn(D.data(), "CIE/FDE too large"); + uint64_t Size = V + 4; + if (Size > D.size()) + failOn(D.data(), "CIE/FDE ends past the end of the section"); + return Size; +} + +// Read a byte and advance D by one byte. +template <class ELFT> uint8_t EhReader<ELFT>::readByte() { + if (D.empty()) + failOn(D.data(), "unexpected end of CIE"); + uint8_t B = D.front(); + D = D.slice(1); + return B; +} + +template <class ELFT> void EhReader<ELFT>::skipBytes(size_t Count) { + if (D.size() < Count) + failOn(D.data(), "CIE is too small"); + D = D.slice(Count); +} + +// Read a null-terminated string. +template <class ELFT> StringRef EhReader<ELFT>::readString() { + const uint8_t *End = std::find(D.begin(), D.end(), '\0'); + if (End == D.end()) + failOn(D.data(), "corrupted CIE (failed to read string)"); + StringRef S = toStringRef(D.slice(0, End - D.begin())); + D = D.slice(S.size() + 1); + return S; +} + +// Skip an integer encoded in the LEB128 format. +// Actual number is not of interest because only the runtime needs it. +// But we need to be at least able to skip it so that we can read +// the field that follows a LEB128 number. +template <class ELFT> void EhReader<ELFT>::skipLeb128() { + const uint8_t *ErrPos = D.data(); + while (!D.empty()) { + uint8_t Val = D.front(); + D = D.slice(1); + if ((Val & 0x80) == 0) + return; + } + failOn(ErrPos, "corrupted CIE (failed to read LEB128)"); +} + +template <class ELFT> static size_t getAugPSize(unsigned Enc) { + switch (Enc & 0x0f) { + case DW_EH_PE_absptr: + case DW_EH_PE_signed: + return ELFT::Is64Bits ? 8 : 4; + case DW_EH_PE_udata2: + case DW_EH_PE_sdata2: + return 2; + case DW_EH_PE_udata4: + case DW_EH_PE_sdata4: + return 4; + case DW_EH_PE_udata8: + case DW_EH_PE_sdata8: + return 8; + } + return 0; +} + +template <class ELFT> void EhReader<ELFT>::skipAugP() { + uint8_t Enc = readByte(); + if ((Enc & 0xf0) == DW_EH_PE_aligned) + failOn(D.data() - 1, "DW_EH_PE_aligned encoding is not supported"); + size_t Size = getAugPSize<ELFT>(Enc); + if (Size == 0) + failOn(D.data() - 1, "unknown FDE encoding"); + if (Size >= D.size()) + failOn(D.data() - 1, "corrupted CIE"); + D = D.slice(Size); +} + +template <class ELFT> uint8_t elf::getFdeEncoding(EhSectionPiece *P) { + auto *IS = static_cast<InputSectionBase<ELFT> *>(P->ID); + return EhReader<ELFT>(IS, P->data()).getFdeEncoding(); +} + +template <class ELFT> uint8_t EhReader<ELFT>::getFdeEncoding() { + skipBytes(8); + int Version = readByte(); + if (Version != 1 && Version != 3) + failOn(D.data() - 1, + "FDE version 1 or 3 expected, but got " + Twine(Version)); + + StringRef Aug = readString(); + + // Skip code and data alignment factors. + skipLeb128(); + skipLeb128(); + + // Skip the return address register. In CIE version 1 this is a single + // byte. In CIE version 3 this is an unsigned LEB128. + if (Version == 1) + readByte(); + else + skipLeb128(); + + // We only care about an 'R' value, but other records may precede an 'R' + // record. Unfortunately records are not in TLV (type-length-value) format, + // so we need to teach the linker how to skip records for each type. + for (char C : Aug) { + if (C == 'R') + return readByte(); + if (C == 'z') { + skipLeb128(); + continue; + } + if (C == 'P') { + skipAugP(); + continue; + } + if (C == 'L') { + readByte(); + continue; + } + failOn(Aug.data(), "unknown .eh_frame augmentation string: " + Aug); + } + return DW_EH_PE_absptr; +} + +template size_t elf::readEhRecordSize<ELF32LE>(InputSectionBase<ELF32LE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF32BE>(InputSectionBase<ELF32BE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF64LE>(InputSectionBase<ELF64LE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF64BE>(InputSectionBase<ELF64BE> *S, + size_t Off); + +template uint8_t elf::getFdeEncoding<ELF32LE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF32BE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF64LE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF64BE>(EhSectionPiece *P); diff --git a/contrib/llvm/tools/lld/ELF/EhFrame.h b/contrib/llvm/tools/lld/ELF/EhFrame.h new file mode 100644 index 000000000000..cadc93d3a2e4 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/EhFrame.h @@ -0,0 +1,26 @@ +//===- EhFrame.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_EHFRAME_H +#define LLD_ELF_EHFRAME_H + +#include "lld/Core/LLVM.h" + +namespace lld { +namespace elf { +template <class ELFT> class InputSectionBase; +struct EhSectionPiece; + +template <class ELFT> +size_t readEhRecordSize(InputSectionBase<ELFT> *S, size_t Off); +template <class ELFT> uint8_t getFdeEncoding(EhSectionPiece *P); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Error.cpp b/contrib/llvm/tools/lld/ELF/Error.cpp new file mode 100644 index 000000000000..6e30f08143ed --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Error.cpp @@ -0,0 +1,106 @@ +//===- Error.cpp ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Error.h" +#include "Config.h" + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" +#include <mutex> + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#endif + +using namespace lld::elf; +using namespace llvm; + +namespace lld { + +uint64_t elf::ErrorCount; +raw_ostream *elf::ErrorOS; +StringRef elf::Argv0; + +// The functions defined in this file can be called from multiple threads, +// but outs() or errs() are not thread-safe. We protect them using a mutex. +static std::mutex Mu; + +static void print(StringRef S, raw_ostream::Colors C) { + *ErrorOS << Argv0 + ": "; + if (Config->ColorDiagnostics) { + ErrorOS->changeColor(C, true); + *ErrorOS << S; + ErrorOS->resetColor(); + } else { + *ErrorOS << S; + } +} + +void elf::log(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + if (Config->Verbose) + outs() << Argv0 << ": " << Msg << "\n"; +} + +void elf::warn(const Twine &Msg) { + if (Config->FatalWarnings) { + error(Msg); + return; + } + std::lock_guard<std::mutex> Lock(Mu); + print("warning: ", raw_ostream::MAGENTA); + *ErrorOS << Msg << "\n"; +} + +void elf::error(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + + if (Config->ErrorLimit == 0 || ErrorCount < Config->ErrorLimit) { + print("error: ", raw_ostream::RED); + *ErrorOS << Msg << "\n"; + } else if (ErrorCount == Config->ErrorLimit) { + print("error: ", raw_ostream::RED); + *ErrorOS << "too many errors emitted, stopping now" + << " (use -error-limit=0 to see all errors)\n"; + if (Config->ExitEarly) + exitLld(1); + } + + ++ErrorCount; +} + +void elf::error(std::error_code EC, const Twine &Prefix) { + error(Prefix + ": " + EC.message()); +} + +void elf::exitLld(int Val) { + // Dealloc/destroy ManagedStatic variables before calling + // _exit(). In a non-LTO build, this is a nop. In an LTO + // build allows us to get the output of -time-passes. + llvm_shutdown(); + + outs().flush(); + errs().flush(); + _exit(Val); +} + +void elf::fatal(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + print("error: ", raw_ostream::RED); + *ErrorOS << Msg << "\n"; + exitLld(1); +} + +void elf::fatal(std::error_code EC, const Twine &Prefix) { + fatal(Prefix + ": " + EC.message()); +} + +} // namespace lld diff --git a/contrib/llvm/tools/lld/ELF/Error.h b/contrib/llvm/tools/lld/ELF/Error.h new file mode 100644 index 000000000000..1ec683595cf4 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Error.h @@ -0,0 +1,81 @@ +//===- Error.h --------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// In LLD, we have three levels of errors: fatal, error or warn. +// +// Fatal makes the program exit immediately with an error message. +// You shouldn't use it except for reporting a corrupted input file. +// +// Error prints out an error message and increment a global variable +// ErrorCount to record the fact that we met an error condition. It does +// not exit, so it is safe for a lld-as-a-library use case. It is generally +// useful because it can report more than one errors in a single run. +// +// Warn doesn't do anything but printing out a given message. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ERROR_H +#define LLD_ELF_ERROR_H + +#include "lld/Core/LLVM.h" + +#include "llvm/Support/Error.h" + +namespace lld { +namespace elf { + +extern uint64_t ErrorCount; +extern llvm::raw_ostream *ErrorOS; +extern llvm::StringRef Argv0; + +void log(const Twine &Msg); +void warn(const Twine &Msg); + +void error(const Twine &Msg); +void error(std::error_code EC, const Twine &Prefix); + +LLVM_ATTRIBUTE_NORETURN void exitLld(int Val); +LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix); + +// check() functions are convenient functions to strip errors +// from error-or-value objects. +template <class T> T check(ErrorOr<T> E) { + if (auto EC = E.getError()) + fatal(EC.message()); + return std::move(*E); +} + +template <class T> T check(Expected<T> E) { + if (!E) + handleAllErrors(std::move(E.takeError()), + [](llvm::ErrorInfoBase &EIB) -> Error { + fatal(EIB.message()); + return Error::success(); + }); + return std::move(*E); +} + +template <class T> T check(ErrorOr<T> E, const Twine &Prefix) { + if (auto EC = E.getError()) + fatal(Prefix + ": " + EC.message()); + return std::move(*E); +} + +template <class T> T check(Expected<T> E, const Twine &Prefix) { + if (!E) + fatal(Prefix + ": " + errorToErrorCode(E.takeError()).message()); + return std::move(*E); +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/GdbIndex.cpp b/contrib/llvm/tools/lld/ELF/GdbIndex.cpp new file mode 100644 index 000000000000..762144dd0a96 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/GdbIndex.cpp @@ -0,0 +1,205 @@ +//===- GdbIndex.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// File contains classes for implementation of --gdb-index command line option. +// +// If that option is used, linker should emit a .gdb_index section that allows +// debugger to locate and read .dwo files, containing neccessary debug +// information. +// More information about implementation can be found in DWARF specification, +// latest version is available at http://dwarfstd.org. +// +// .gdb_index section format: +// (Information is based on/taken from +// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html (*)) +// +// A mapped index consists of several areas, laid out in order: +// 1) The file header. +// 2) "The CU (compilation unit) list. This is a sequence of pairs of 64-bit +// little-endian values, sorted by the CU offset. The first element in each +// pair is the offset of a CU in the .debug_info section. The second element +// in each pair is the length of that CU. References to a CU elsewhere in the +// map are done using a CU index, which is just the 0-based index into this +// table. Note that if there are type CUs, then conceptually CUs and type CUs +// form a single list for the purposes of CU indices."(*) +// 3) The types CU list. Depricated as .debug_types does not appear in the DWARF +// v5 specification. +// 4) The address area. The address area is a sequence of address +// entries, where each entrie contains low address, high address and CU +// index. +// 5) "The symbol table. This is an open-addressed hash table. The size of the +// hash table is always a power of 2. Each slot in the hash table consists of +// a pair of offset_type values. The first value is the offset of the +// symbol's name in the constant pool. The second value is the offset of the +// CU vector in the constant pool."(*) +// 6) "The constant pool. This is simply a bunch of bytes. It is organized so +// that alignment is correct: CU vectors are stored first, followed by +// strings." (*) +// +// For constructing the .gdb_index section following steps should be performed: +// 1) For file header nothing special should be done. It contains the offsets to +// the areas below. +// 2) Scan the compilation unit headers of the .debug_info sections to build a +// list of compilation units. +// 3) CU Types are no longer needed as DWARF skeleton type units never made it +// into the standard. lld does nothing to support parsing of .debug_types +// and generates empty types CU area in .gdb_index section. +// 4) Address area entries are extracted from DW_TAG_compile_unit DIEs of +// .debug_info sections. +// 5) For building the symbol table linker extracts the public names from the +// .debug_gnu_pubnames and .debug_gnu_pubtypes sections. Then it builds the +// hashtable in according to .gdb_index format specification. +// 6) Constant pool is populated at the same time as symbol table. +//===----------------------------------------------------------------------===// + +#include "GdbIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" +#include "llvm/Object/ELFObjectFile.h" + +using namespace llvm; +using namespace llvm::object; +using namespace lld::elf; + +template <class ELFT> +GdbIndexBuilder<ELFT>::GdbIndexBuilder(InputSection<ELFT> *DebugInfoSec) + : DebugInfoSec(DebugInfoSec) { + if (Expected<std::unique_ptr<object::ObjectFile>> Obj = + object::ObjectFile::createObjectFile(DebugInfoSec->getFile()->MB)) + Dwarf.reset(new DWARFContextInMemory(*Obj.get(), this)); + else + error(toString(DebugInfoSec->getFile()) + ": error creating DWARF context"); +} + +template <class ELFT> +std::vector<std::pair<typename ELFT::uint, typename ELFT::uint>> +GdbIndexBuilder<ELFT>::readCUList() { + std::vector<std::pair<uintX_t, uintX_t>> Ret; + for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf->compile_units()) + Ret.push_back( + {DebugInfoSec->OutSecOff + CU->getOffset(), CU->getLength() + 4}); + return Ret; +} + +template <class ELFT> +std::vector<std::pair<StringRef, uint8_t>> +GdbIndexBuilder<ELFT>::readPubNamesAndTypes() { + const bool IsLE = ELFT::TargetEndianness == llvm::support::little; + StringRef Data[] = {Dwarf->getGnuPubNamesSection(), + Dwarf->getGnuPubTypesSection()}; + + std::vector<std::pair<StringRef, uint8_t>> Ret; + for (StringRef D : Data) { + DWARFDebugPubTable PubTable(D, IsLE, true); + for (const DWARFDebugPubTable::Set &S : PubTable.getData()) + for (const DWARFDebugPubTable::Entry &E : S.Entries) + Ret.push_back({E.Name, E.Descriptor.toBits()}); + } + return Ret; +} + +std::pair<bool, GdbSymbol *> GdbHashTab::add(uint32_t Hash, size_t Offset) { + if (Size * 4 / 3 >= Table.size()) + expand(); + + GdbSymbol **Slot = findSlot(Hash, Offset); + bool New = false; + if (*Slot == nullptr) { + ++Size; + *Slot = new (Alloc) GdbSymbol(Hash, Offset); + New = true; + } + return {New, *Slot}; +} + +void GdbHashTab::expand() { + if (Table.empty()) { + Table.resize(InitialSize); + return; + } + std::vector<GdbSymbol *> NewTable(Table.size() * 2); + NewTable.swap(Table); + + for (GdbSymbol *Sym : NewTable) { + if (!Sym) + continue; + GdbSymbol **Slot = findSlot(Sym->NameHash, Sym->NameOffset); + *Slot = Sym; + } +} + +// Methods finds a slot for symbol with given hash. The step size used to find +// the next candidate slot when handling a hash collision is specified in +// .gdb_index section format. The hash value for a table entry is computed by +// applying an iterative hash function to the symbol's name. +GdbSymbol **GdbHashTab::findSlot(uint32_t Hash, size_t Offset) { + uint32_t Index = Hash & (Table.size() - 1); + uint32_t Step = ((Hash * 17) & (Table.size() - 1)) | 1; + + for (;;) { + GdbSymbol *S = Table[Index]; + if (!S || ((S->NameOffset == Offset) && (S->NameHash == Hash))) + return &Table[Index]; + Index = (Index + Step) & (Table.size() - 1); + } +} + +template <class ELFT> +static InputSectionBase<ELFT> * +findSection(ArrayRef<InputSectionBase<ELFT> *> Arr, uint64_t Offset) { + for (InputSectionBase<ELFT> *S : Arr) + if (S && S != &InputSection<ELFT>::Discarded) + if (Offset >= S->Offset && Offset < S->Offset + S->getSize()) + return S; + return nullptr; +} + +template <class ELFT> +std::vector<AddressEntry<ELFT>> +GdbIndexBuilder<ELFT>::readAddressArea(size_t CurrentCU) { + std::vector<AddressEntry<ELFT>> Ret; + for (const auto &CU : Dwarf->compile_units()) { + DWARFAddressRangesVector Ranges; + CU->collectAddressRanges(Ranges); + + ArrayRef<InputSectionBase<ELFT> *> Sections = + DebugInfoSec->getFile()->getSections(); + + for (std::pair<uint64_t, uint64_t> &R : Ranges) + if (InputSectionBase<ELFT> *S = findSection(Sections, R.first)) + Ret.push_back( + {S, R.first - S->Offset, R.second - S->Offset, CurrentCU}); + ++CurrentCU; + } + return Ret; +} + +// We return file offset as load address for allocatable sections. That is +// currently used for collecting address ranges in readAddressArea(). We are +// able then to find section index that range belongs to. +template <class ELFT> +uint64_t GdbIndexBuilder<ELFT>::getSectionLoadAddress( + const object::SectionRef &Sec) const { + if (static_cast<const ELFSectionRef &>(Sec).getFlags() & ELF::SHF_ALLOC) + return static_cast<const ELFSectionRef &>(Sec).getOffset(); + return 0; +} + +template <class ELFT> +std::unique_ptr<LoadedObjectInfo> GdbIndexBuilder<ELFT>::clone() const { + return {}; +} + +namespace lld { +namespace elf { +template class GdbIndexBuilder<ELF32LE>; +template class GdbIndexBuilder<ELF32BE>; +template class GdbIndexBuilder<ELF64LE>; +template class GdbIndexBuilder<ELF64BE>; +} +} diff --git a/contrib/llvm/tools/lld/ELF/GdbIndex.h b/contrib/llvm/tools/lld/ELF/GdbIndex.h new file mode 100644 index 000000000000..c761ea173a8d --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/GdbIndex.h @@ -0,0 +1,99 @@ +//===- GdbIndex.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// + +#ifndef LLD_ELF_GDB_INDEX_H +#define LLD_ELF_GDB_INDEX_H + +#include "InputFiles.h" +#include "llvm/Object/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" + +namespace lld { +namespace elf { + +template <class ELFT> class InputSection; + +// Struct represents single entry of address area of gdb index. +template <class ELFT> struct AddressEntry { + InputSectionBase<ELFT> *Section; + uint64_t LowAddress; + uint64_t HighAddress; + size_t CuIndex; +}; + +// GdbIndexBuilder is a helper class used for extracting data required +// for building .gdb_index section from objects. +template <class ELFT> class GdbIndexBuilder : public llvm::LoadedObjectInfo { + typedef typename ELFT::uint uintX_t; + + InputSection<ELFT> *DebugInfoSec; + + std::unique_ptr<llvm::DWARFContext> Dwarf; + +public: + GdbIndexBuilder(InputSection<ELFT> *DebugInfoSec); + + // Extracts the compilation units. Each first element of pair is a offset of a + // CU in the .debug_info section and second is the length of that CU. + std::vector<std::pair<uintX_t, uintX_t>> readCUList(); + + // Extracts the vector of address area entries. Accepts global index of last + // parsed CU. + std::vector<AddressEntry<ELFT>> readAddressArea(size_t CurrentCU); + + // Method extracts public names and types. It returns list of name and + // gnu_pub* kind pairs. + std::vector<std::pair<StringRef, uint8_t>> readPubNamesAndTypes(); + +private: + // Method returns section file offset as a load addres for DWARF parser. That + // allows to find the target section index for address ranges. + uint64_t + getSectionLoadAddress(const llvm::object::SectionRef &Sec) const override; + std::unique_ptr<llvm::LoadedObjectInfo> clone() const override; +}; + +// Element of GdbHashTab hash table. +struct GdbSymbol { + GdbSymbol(uint32_t Hash, size_t Offset) + : NameHash(Hash), NameOffset(Offset) {} + uint32_t NameHash; + size_t NameOffset; + size_t CuVectorIndex; +}; + +// This class manages the hashed symbol table for the .gdb_index section. +// The hash value for a table entry is computed by applying an iterative hash +// function to the symbol's name. +class GdbHashTab final { +public: + std::pair<bool, GdbSymbol *> add(uint32_t Hash, size_t Offset); + + size_t getCapacity() { return Table.size(); } + GdbSymbol *getSymbol(size_t I) { return Table[I]; } + +private: + void expand(); + + GdbSymbol **findSlot(uint32_t Hash, size_t Offset); + + llvm::BumpPtrAllocator Alloc; + std::vector<GdbSymbol *> Table; + + // Size keeps the amount of filled entries in Table. + size_t Size = 0; + + // Initial size must be a power of 2. + static const int32_t InitialSize = 1024; +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/ICF.cpp b/contrib/llvm/tools/lld/ELF/ICF.cpp new file mode 100644 index 000000000000..32cd0f8a185c --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/ICF.cpp @@ -0,0 +1,383 @@ +//===- ICF.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// ICF is short for Identical Code Folding. This is a size optimization to +// identify and merge two or more read-only sections (typically functions) +// that happened to have the same contents. It usually reduces output size +// by a few percent. +// +// In ICF, two sections are considered identical if they have the same +// section flags, section data, and relocations. Relocations are tricky, +// because two relocations are considered the same if they have the same +// relocation types, values, and if they point to the same sections *in +// terms of ICF*. +// +// Here is an example. If foo and bar defined below are compiled to the +// same machine instructions, ICF can and should merge the two, although +// their relocations point to each other. +// +// void foo() { bar(); } +// void bar() { foo(); } +// +// If you merge the two, their relocations point to the same section and +// thus you know they are mergeable, but how do you know they are +// mergeable in the first place? This is not an easy problem to solve. +// +// What we are doing in LLD is to partition sections into equivalence +// classes. Sections in the same equivalence class when the algorithm +// terminates are considered identical. Here are details: +// +// 1. First, we partition sections using their hash values as keys. Hash +// values contain section types, section contents and numbers of +// relocations. During this step, relocation targets are not taken into +// account. We just put sections that apparently differ into different +// equivalence classes. +// +// 2. Next, for each equivalence class, we visit sections to compare +// relocation targets. Relocation targets are considered equivalent if +// their targets are in the same equivalence class. Sections with +// different relocation targets are put into different equivalence +// clases. +// +// 3. If we split an equivalence class in step 2, two relocations +// previously target the same equivalence class may now target +// different equivalence classes. Therefore, we repeat step 2 until a +// convergence is obtained. +// +// 4. For each equivalence class C, pick an arbitrary section in C, and +// merge all the other sections in C with it. +// +// For small programs, this algorithm needs 3-5 iterations. For large +// programs such as Chromium, it takes more than 20 iterations. +// +// This algorithm was mentioned as an "optimistic algorithm" in [1], +// though gold implements a different algorithm than this. +// +// We parallelize each step so that multiple threads can work on different +// equivalence classes concurrently. That gave us a large performance +// boost when applying ICF on large programs. For example, MSVC link.exe +// or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output +// size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a +// 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still +// faster than MSVC or gold though. +// +// [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding +// in the Gold Linker +// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf +// +//===----------------------------------------------------------------------===// + +#include "ICF.h" +#include "Config.h" +#include "SymbolTable.h" +#include "Threads.h" + +#include "llvm/ADT/Hashing.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include <algorithm> +#include <atomic> + +using namespace lld; +using namespace lld::elf; +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; + +namespace { +template <class ELFT> class ICF { +public: + void run(); + +private: + void segregate(size_t Begin, size_t End, bool Constant); + + template <class RelTy> + bool constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB); + + template <class RelTy> + bool variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, + const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB); + + bool equalsConstant(const InputSection<ELFT> *A, const InputSection<ELFT> *B); + bool equalsVariable(const InputSection<ELFT> *A, const InputSection<ELFT> *B); + + size_t findBoundary(size_t Begin, size_t End); + + void forEachClassRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn); + + void forEachClass(std::function<void(size_t, size_t)> Fn); + + std::vector<InputSection<ELFT> *> Sections; + + // We repeat the main loop while `Repeat` is true. + std::atomic<bool> Repeat; + + // The main loop counter. + int Cnt = 0; + + // We have two locations for equivalence classes. On the first iteration + // of the main loop, Class[0] has a valid value, and Class[1] contains + // garbage. We read equivalence classes from slot 0 and write to slot 1. + // So, Class[0] represents the current class, and Class[1] represents + // the next class. On each iteration, we switch their roles and use them + // alternately. + // + // Why are we doing this? Recall that other threads may be working on + // other equivalence classes in parallel. They may read sections that we + // are updating. We cannot update equivalence classes in place because + // it breaks the invariance that all possibly-identical sections must be + // in the same equivalence class at any moment. In other words, the for + // loop to update equivalence classes is not atomic, and that is + // observable from other threads. By writing new classes to other + // places, we can keep the invariance. + // + // Below, `Current` has the index of the current class, and `Next` has + // the index of the next class. If threading is enabled, they are either + // (0, 1) or (1, 0). + // + // Note on single-thread: if that's the case, they are always (0, 0) + // because we can safely read the next class without worrying about race + // conditions. Using the same location makes this algorithm converge + // faster because it uses results of the same iteration earlier. + int Current = 0; + int Next = 0; +}; +} + +// Returns a hash value for S. Note that the information about +// relocation targets is not included in the hash value. +template <class ELFT> static uint32_t getHash(InputSection<ELFT> *S) { + return hash_combine(S->Flags, S->getSize(), S->NumRelocations); +} + +// Returns true if section S is subject of ICF. +template <class ELFT> static bool isEligible(InputSection<ELFT> *S) { + // .init and .fini contains instructions that must be executed to + // initialize and finalize the process. They cannot and should not + // be merged. + return S->Live && (S->Flags & SHF_ALLOC) && !(S->Flags & SHF_WRITE) && + S->Name != ".init" && S->Name != ".fini"; +} + +// Split an equivalence class into smaller classes. +template <class ELFT> +void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) { + // This loop rearranges sections in [Begin, End) so that all sections + // that are equal in terms of equals{Constant,Variable} are contiguous + // in [Begin, End). + // + // The algorithm is quadratic in the worst case, but that is not an + // issue in practice because the number of the distinct sections in + // each range is usually very small. + + while (Begin < End) { + // Divide [Begin, End) into two. Let Mid be the start index of the + // second group. + auto Bound = std::stable_partition( + Sections.begin() + Begin + 1, Sections.begin() + End, + [&](InputSection<ELFT> *S) { + if (Constant) + return equalsConstant(Sections[Begin], S); + return equalsVariable(Sections[Begin], S); + }); + size_t Mid = Bound - Sections.begin(); + + // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by + // updating the sections in [Begin, End). We use Mid as an equivalence + // class ID because every group ends with a unique index. + for (size_t I = Begin; I < Mid; ++I) + Sections[I]->Class[Next] = Mid; + + // If we created a group, we need to iterate the main loop again. + if (Mid != End) + Repeat = true; + + Begin = Mid; + } +} + +// Compare two lists of relocations. +template <class ELFT> +template <class RelTy> +bool ICF<ELFT>::constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { + auto Eq = [](const RelTy &A, const RelTy &B) { + return A.r_offset == B.r_offset && + A.getType(Config->Mips64EL) == B.getType(Config->Mips64EL) && + getAddend<ELFT>(A) == getAddend<ELFT>(B); + }; + + return RelsA.size() == RelsB.size() && + std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq); +} + +// Compare "non-moving" part of two InputSections, namely everything +// except relocation targets. +template <class ELFT> +bool ICF<ELFT>::equalsConstant(const InputSection<ELFT> *A, + const InputSection<ELFT> *B) { + if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags || + A->getSize() != B->getSize() || A->Data != B->Data) + return false; + + if (A->AreRelocsRela) + return constantEq(A->relas(), B->relas()); + return constantEq(A->rels(), B->rels()); +} + +// Compare two lists of relocations. Returns true if all pairs of +// relocations point to the same section in terms of ICF. +template <class ELFT> +template <class RelTy> +bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, + const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB) { + auto Eq = [&](const RelTy &RA, const RelTy &RB) { + // The two sections must be identical. + SymbolBody &SA = A->getFile()->getRelocTargetSym(RA); + SymbolBody &SB = B->getFile()->getRelocTargetSym(RB); + if (&SA == &SB) + return true; + + // Or, the two sections must be in the same equivalence class. + auto *DA = dyn_cast<DefinedRegular<ELFT>>(&SA); + auto *DB = dyn_cast<DefinedRegular<ELFT>>(&SB); + if (!DA || !DB) + return false; + if (DA->Value != DB->Value) + return false; + + auto *X = dyn_cast<InputSection<ELFT>>(DA->Section); + auto *Y = dyn_cast<InputSection<ELFT>>(DB->Section); + if (!X || !Y) + return false; + + // Ineligible sections are in the special equivalence class 0. + // They can never be the same in terms of the equivalence class. + if (X->Class[Current] == 0) + return false; + + return X->Class[Current] == Y->Class[Current]; + }; + + return std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq); +} + +// Compare "moving" part of two InputSections, namely relocation targets. +template <class ELFT> +bool ICF<ELFT>::equalsVariable(const InputSection<ELFT> *A, + const InputSection<ELFT> *B) { + if (A->AreRelocsRela) + return variableEq(A, A->relas(), B, B->relas()); + return variableEq(A, A->rels(), B, B->rels()); +} + +template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) { + uint32_t Class = Sections[Begin]->Class[Current]; + for (size_t I = Begin + 1; I < End; ++I) + if (Class != Sections[I]->Class[Current]) + return I; + return End; +} + +// Sections in the same equivalence class are contiguous in Sections +// vector. Therefore, Sections vector can be considered as contiguous +// groups of sections, grouped by the class. +// +// This function calls Fn on every group that starts within [Begin, End). +// Note that a group must starts in that range but doesn't necessarily +// have to end before End. +template <class ELFT> +void ICF<ELFT>::forEachClassRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn) { + if (Begin > 0) + Begin = findBoundary(Begin - 1, End); + + while (Begin < End) { + size_t Mid = findBoundary(Begin, Sections.size()); + Fn(Begin, Mid); + Begin = Mid; + } +} + +// Call Fn on each equivalence class. +template <class ELFT> +void ICF<ELFT>::forEachClass(std::function<void(size_t, size_t)> Fn) { + // If threading is disabled or the number of sections are + // too small to use threading, call Fn sequentially. + if (!Config->Threads || Sections.size() < 1024) { + forEachClassRange(0, Sections.size(), Fn); + ++Cnt; + return; + } + + Current = Cnt % 2; + Next = (Cnt + 1) % 2; + + // Split sections into 256 shards and call Fn in parallel. + size_t NumShards = 256; + size_t Step = Sections.size() / NumShards; + forLoop(0, NumShards, + [&](size_t I) { forEachClassRange(I * Step, (I + 1) * Step, Fn); }); + forEachClassRange(Step * NumShards, Sections.size(), Fn); + ++Cnt; +} + +// The main function of ICF. +template <class ELFT> void ICF<ELFT>::run() { + // Collect sections to merge. + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) + if (auto *S = dyn_cast<InputSection<ELFT>>(Sec)) + if (isEligible(S)) + Sections.push_back(S); + + // Initially, we use hash values to partition sections. + for (InputSection<ELFT> *S : Sections) + // Set MSB to 1 to avoid collisions with non-hash IDs. + S->Class[0] = getHash(S) | (1 << 31); + + // From now on, sections in Sections vector are ordered so that sections + // in the same equivalence class are consecutive in the vector. + std::stable_sort(Sections.begin(), Sections.end(), + [](InputSection<ELFT> *A, InputSection<ELFT> *B) { + return A->Class[0] < B->Class[0]; + }); + + // Compare static contents and assign unique IDs for each static content. + forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); + + // Split groups by comparing relocations until convergence is obtained. + do { + Repeat = false; + forEachClass( + [&](size_t Begin, size_t End) { segregate(Begin, End, false); }); + } while (Repeat); + + log("ICF needed " + Twine(Cnt) + " iterations"); + + // Merge sections by the equivalence class. + forEachClass([&](size_t Begin, size_t End) { + if (End - Begin == 1) + return; + + log("selected " + Sections[Begin]->Name); + for (size_t I = Begin + 1; I < End; ++I) { + log(" removed " + Sections[I]->Name); + Sections[Begin]->replace(Sections[I]); + } + }); +} + +// ICF entry point function. +template <class ELFT> void elf::doIcf() { ICF<ELFT>().run(); } + +template void elf::doIcf<ELF32LE>(); +template void elf::doIcf<ELF32BE>(); +template void elf::doIcf<ELF64LE>(); +template void elf::doIcf<ELF64BE>(); diff --git a/contrib/llvm/tools/lld/ELF/ICF.h b/contrib/llvm/tools/lld/ELF/ICF.h new file mode 100644 index 000000000000..502e128c8109 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/ICF.h @@ -0,0 +1,19 @@ +//===- ICF.h --------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ICF_H +#define LLD_ELF_ICF_H + +namespace lld { +namespace elf { +template <class ELFT> void doIcf(); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.cpp b/contrib/llvm/tools/lld/ELF/InputFiles.cpp new file mode 100644 index 000000000000..1fddf40f5b22 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/InputFiles.cpp @@ -0,0 +1,986 @@ +//===- InputFiles.cpp -----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" +#include "Error.h" +#include "InputSection.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/LTO/LTO.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/TarWriter.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::sys::fs; + +using namespace lld; +using namespace lld::elf; + +TarWriter *elf::Tar; + +namespace { +// In ELF object file all section addresses are zero. If we have multiple +// .text sections (when using -ffunction-section or comdat group) then +// LLVM DWARF parser will not be able to parse .debug_line correctly, unless +// we assign each section some unique address. This callback method assigns +// each section an address equal to its offset in ELF object file. +class ObjectInfo : public LoadedObjectInfo { +public: + uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const override { + return static_cast<const ELFSectionRef &>(Sec).getOffset(); + } + std::unique_ptr<LoadedObjectInfo> clone() const override { + return std::unique_ptr<LoadedObjectInfo>(); + } +}; +} + +Optional<MemoryBufferRef> elf::readFile(StringRef Path) { + if (Config->Verbose) + outs() << Path << "\n"; + + auto MBOrErr = MemoryBuffer::getFile(Path); + if (auto EC = MBOrErr.getError()) { + error(EC, "cannot open " + Path); + return None; + } + std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; + MemoryBufferRef MBRef = MB->getMemBufferRef(); + make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership + + if (Tar) + Tar->append(relativeToRoot(Path), MBRef.getBuffer()); + return MBRef; +} + +template <class ELFT> void elf::ObjectFile<ELFT>::initializeDwarfLine() { + std::unique_ptr<object::ObjectFile> Obj = + check(object::ObjectFile::createObjectFile(this->MB), + "createObjectFile failed"); + + ObjectInfo ObjInfo; + DWARFContextInMemory Dwarf(*Obj, &ObjInfo); + DwarfLine.reset(new DWARFDebugLine(&Dwarf.getLineSection().Relocs)); + DataExtractor LineData(Dwarf.getLineSection().Data, + ELFT::TargetEndianness == support::little, + ELFT::Is64Bits ? 8 : 4); + + // The second parameter is offset in .debug_line section + // for compilation unit (CU) of interest. We have only one + // CU (object file), so offset is always 0. + DwarfLine->getOrParseLineTable(LineData, 0); +} + +// Returns source line information for a given offset +// using DWARF debug info. +template <class ELFT> +std::string elf::ObjectFile<ELFT>::getLineInfo(InputSectionBase<ELFT> *S, + uintX_t Offset) { + if (!DwarfLine) + initializeDwarfLine(); + + // The offset to CU is 0. + const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0); + if (!Tbl) + return ""; + + // Use fake address calcuated by adding section file offset and offset in + // section. See comments for ObjectInfo class. + DILineInfo Info; + Tbl->getFileLineInfoForAddress( + S->Offset + Offset, nullptr, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info); + if (Info.Line == 0) + return ""; + return Info.FileName + ":" + std::to_string(Info.Line); +} + +// Returns "(internal)", "foo.a(bar.o)" or "baz.o". +std::string lld::toString(const InputFile *F) { + if (!F) + return "(internal)"; + if (!F->ArchiveName.empty()) + return (F->ArchiveName + "(" + F->getName() + ")").str(); + return F->getName(); +} + +template <class ELFT> static ELFKind getELFKind() { + if (ELFT::TargetEndianness == support::little) + return ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind; + return ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind; +} + +template <class ELFT> +ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) { + EKind = getELFKind<ELFT>(); + EMachine = getObj().getHeader()->e_machine; + OSABI = getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; +} + +template <class ELFT> +typename ELFT::SymRange ELFFileBase<ELFT>::getGlobalSymbols() { + return makeArrayRef(Symbols.begin() + FirstNonLocal, Symbols.end()); +} + +template <class ELFT> +uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const { + return check(getObj().getSectionIndex(&Sym, Symbols, SymtabSHNDX)); +} + +template <class ELFT> +void ELFFileBase<ELFT>::initSymtab(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr *Symtab) { + FirstNonLocal = Symtab->sh_info; + Symbols = check(getObj().symbols(Symtab)); + if (FirstNonLocal == 0 || FirstNonLocal > Symbols.size()) + fatal(toString(this) + ": invalid sh_info in symbol table"); + + StringTable = check(getObj().getStringTableForSymtab(*Symtab, Sections)); +} + +template <class ELFT> +elf::ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M) + : ELFFileBase<ELFT>(Base::ObjectKind, M) {} + +template <class ELFT> +ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getNonLocalSymbols() { + return makeArrayRef(this->SymbolBodies).slice(this->FirstNonLocal); +} + +template <class ELFT> +ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getLocalSymbols() { + if (this->SymbolBodies.empty()) + return this->SymbolBodies; + return makeArrayRef(this->SymbolBodies).slice(1, this->FirstNonLocal - 1); +} + +template <class ELFT> +ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getSymbols() { + if (this->SymbolBodies.empty()) + return this->SymbolBodies; + return makeArrayRef(this->SymbolBodies).slice(1); +} + +template <class ELFT> +void elf::ObjectFile<ELFT>::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { + // Read section and symbol tables. + initializeSections(ComdatGroups); + initializeSymbols(); +} + +// Sections with SHT_GROUP and comdat bits define comdat section groups. +// They are identified and deduplicated by group name. This function +// returns a group name. +template <class ELFT> +StringRef +elf::ObjectFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr &Sec) { + if (this->Symbols.empty()) + this->initSymtab(Sections, + check(object::getSection<ELFT>(Sections, Sec.sh_link))); + const Elf_Sym *Sym = + check(object::getSymbol<ELFT>(this->Symbols, Sec.sh_info)); + return check(Sym->getName(this->StringTable)); +} + +template <class ELFT> +ArrayRef<typename elf::ObjectFile<ELFT>::Elf_Word> +elf::ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) { + const ELFFile<ELFT> &Obj = this->getObj(); + ArrayRef<Elf_Word> Entries = + check(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec)); + if (Entries.empty() || Entries[0] != GRP_COMDAT) + fatal(toString(this) + ": unsupported SHT_GROUP format"); + return Entries.slice(1); +} + +template <class ELFT> +bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { + // We don't merge sections if -O0 (default is -O1). This makes sometimes + // the linker significantly faster, although the output will be bigger. + if (Config->Optimize == 0) + return false; + + // Do not merge sections if generating a relocatable object. It makes + // the code simpler because we do not need to update relocation addends + // to reflect changes introduced by merging. Instead of that we write + // such "merge" sections into separate OutputSections and keep SHF_MERGE + // / SHF_STRINGS flags and sh_entsize value to be able to perform merging + // later during a final linking. + if (Config->Relocatable) + return false; + + // A mergeable section with size 0 is useless because they don't have + // any data to merge. A mergeable string section with size 0 can be + // argued as invalid because it doesn't end with a null character. + // We'll avoid a mess by handling them as if they were non-mergeable. + if (Sec.sh_size == 0) + return false; + + // Check for sh_entsize. The ELF spec is not clear about the zero + // sh_entsize. It says that "the member [sh_entsize] contains 0 if + // the section does not hold a table of fixed-size entries". We know + // that Rust 1.13 produces a string mergeable section with a zero + // sh_entsize. Here we just accept it rather than being picky about it. + uintX_t EntSize = Sec.sh_entsize; + if (EntSize == 0) + return false; + if (Sec.sh_size % EntSize) + fatal(toString(this) + + ": SHF_MERGE section size must be a multiple of sh_entsize"); + + uintX_t Flags = Sec.sh_flags; + if (!(Flags & SHF_MERGE)) + return false; + if (Flags & SHF_WRITE) + fatal(toString(this) + ": writable SHF_MERGE section is not supported"); + + // Don't try to merge if the alignment is larger than the sh_entsize and this + // is not SHF_STRINGS. + // + // Since this is not a SHF_STRINGS, we would need to pad after every entity. + // It would be equivalent for the producer of the .o to just set a larger + // sh_entsize. + if (Flags & SHF_STRINGS) + return true; + + return Sec.sh_addralign <= EntSize; +} + +template <class ELFT> +void elf::ObjectFile<ELFT>::initializeSections( + DenseSet<CachedHashStringRef> &ComdatGroups) { + ArrayRef<Elf_Shdr> ObjSections = check(this->getObj().sections()); + const ELFFile<ELFT> &Obj = this->getObj(); + uint64_t Size = ObjSections.size(); + Sections.resize(Size); + unsigned I = -1; + StringRef SectionStringTable = check(Obj.getSectionStringTable(ObjSections)); + for (const Elf_Shdr &Sec : ObjSections) { + ++I; + if (Sections[I] == &InputSection<ELFT>::Discarded) + continue; + + // SHF_EXCLUDE'ed sections are discarded by the linker. However, + // if -r is given, we'll let the final link discard such sections. + // This is compatible with GNU. + if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) { + Sections[I] = &InputSection<ELFT>::Discarded; + continue; + } + + switch (Sec.sh_type) { + case SHT_GROUP: + Sections[I] = &InputSection<ELFT>::Discarded; + if (ComdatGroups.insert(CachedHashStringRef( + getShtGroupSignature(ObjSections, Sec))) + .second) + continue; + for (uint32_t SecIndex : getShtGroupEntries(Sec)) { + if (SecIndex >= Size) + fatal(toString(this) + ": invalid section index in group: " + + Twine(SecIndex)); + Sections[SecIndex] = &InputSection<ELFT>::Discarded; + } + break; + case SHT_SYMTAB: + this->initSymtab(ObjSections, &Sec); + break; + case SHT_SYMTAB_SHNDX: + this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, ObjSections)); + break; + case SHT_STRTAB: + case SHT_NULL: + break; + default: + Sections[I] = createInputSection(Sec, SectionStringTable); + } + + // .ARM.exidx sections have a reverse dependency on the InputSection they + // have a SHF_LINK_ORDER dependency, this is identified by the sh_link. + if (Sec.sh_flags & SHF_LINK_ORDER) { + if (Sec.sh_link >= Sections.size()) + fatal(toString(this) + ": invalid sh_link index: " + + Twine(Sec.sh_link)); + auto *IS = cast<InputSection<ELFT>>(Sections[Sec.sh_link]); + IS->DependentSection = Sections[I]; + } + } +} + +template <class ELFT> +InputSectionBase<ELFT> * +elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { + uint32_t Idx = Sec.sh_info; + if (Idx >= Sections.size()) + fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx)); + InputSectionBase<ELFT> *Target = Sections[Idx]; + + // Strictly speaking, a relocation section must be included in the + // group of the section it relocates. However, LLVM 3.3 and earlier + // would fail to do so, so we gracefully handle that case. + if (Target == &InputSection<ELFT>::Discarded) + return nullptr; + + if (!Target) + fatal(toString(this) + ": unsupported relocation reference"); + return Target; +} + +template <class ELFT> +InputSectionBase<ELFT> * +elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec, + StringRef SectionStringTable) { + StringRef Name = + check(this->getObj().getSectionName(&Sec, SectionStringTable)); + + switch (Sec.sh_type) { + case SHT_ARM_ATTRIBUTES: + // FIXME: ARM meta-data section. Retain the first attribute section + // we see. The eglibc ARM dynamic loaders require the presence of an + // attribute section for dlopen to work. + // In a full implementation we would merge all attribute sections. + if (In<ELFT>::ARMAttributes == nullptr) { + In<ELFT>::ARMAttributes = make<InputSection<ELFT>>(this, &Sec, Name); + return In<ELFT>::ARMAttributes; + } + return &InputSection<ELFT>::Discarded; + case SHT_RELA: + case SHT_REL: { + // This section contains relocation information. + // If -r is given, we do not interpret or apply relocation + // but just copy relocation sections to output. + if (Config->Relocatable) + return make<InputSection<ELFT>>(this, &Sec, Name); + + // Find the relocation target section and associate this + // section with it. + InputSectionBase<ELFT> *Target = getRelocTarget(Sec); + if (!Target) + return nullptr; + if (Target->FirstRelocation) + fatal(toString(this) + + ": multiple relocation sections to one section are not supported"); + if (!isa<InputSection<ELFT>>(Target) && !isa<EhInputSection<ELFT>>(Target)) + fatal(toString(this) + + ": relocations pointing to SHF_MERGE are not supported"); + + size_t NumRelocations; + if (Sec.sh_type == SHT_RELA) { + ArrayRef<Elf_Rela> Rels = check(this->getObj().relas(&Sec)); + Target->FirstRelocation = Rels.begin(); + NumRelocations = Rels.size(); + Target->AreRelocsRela = true; + } else { + ArrayRef<Elf_Rel> Rels = check(this->getObj().rels(&Sec)); + Target->FirstRelocation = Rels.begin(); + NumRelocations = Rels.size(); + Target->AreRelocsRela = false; + } + assert(isUInt<31>(NumRelocations)); + Target->NumRelocations = NumRelocations; + return nullptr; + } + } + + // .note.GNU-stack is a marker section to control the presence of + // PT_GNU_STACK segment in outputs. Since the presence of the segment + // is controlled only by the command line option (-z execstack) in LLD, + // .note.GNU-stack is ignored. + if (Name == ".note.GNU-stack") + return &InputSection<ELFT>::Discarded; + + if (Name == ".note.GNU-split-stack") { + error("objects using splitstacks are not supported"); + return &InputSection<ELFT>::Discarded; + } + + if (Config->Strip != StripPolicy::None && Name.startswith(".debug")) + return &InputSection<ELFT>::Discarded; + + // The linkonce feature is a sort of proto-comdat. Some glibc i386 object + // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce + // sections. Drop those sections to avoid duplicate symbol errors. + // FIXME: This is glibc PR20543, we should remove this hack once that has been + // fixed for a while. + if (Name.startswith(".gnu.linkonce.")) + return &InputSection<ELFT>::Discarded; + + // The linker merges EH (exception handling) frames and creates a + // .eh_frame_hdr section for runtime. So we handle them with a special + // class. For relocatable outputs, they are just passed through. + if (Name == ".eh_frame" && !Config->Relocatable) + return make<EhInputSection<ELFT>>(this, &Sec, Name); + + if (shouldMerge(Sec)) + return make<MergeInputSection<ELFT>>(this, &Sec, Name); + return make<InputSection<ELFT>>(this, &Sec, Name); +} + +template <class ELFT> void elf::ObjectFile<ELFT>::initializeSymbols() { + SymbolBodies.reserve(this->Symbols.size()); + for (const Elf_Sym &Sym : this->Symbols) + SymbolBodies.push_back(createSymbolBody(&Sym)); +} + +template <class ELFT> +InputSectionBase<ELFT> * +elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { + uint32_t Index = this->getSectionIndex(Sym); + if (Index >= Sections.size()) + fatal(toString(this) + ": invalid section index: " + Twine(Index)); + InputSectionBase<ELFT> *S = Sections[Index]; + + // We found that GNU assembler 2.17.50 [FreeBSD] 2007-07-03 could + // generate broken objects. STT_SECTION/STT_NOTYPE symbols can be + // associated with SHT_REL[A]/SHT_SYMTAB/SHT_STRTAB sections. + // In this case it is fine for section to be null here as we do not + // allocate sections of these types. + if (!S) { + if (Index == 0 || Sym.getType() == STT_SECTION || + Sym.getType() == STT_NOTYPE) + return nullptr; + fatal(toString(this) + ": invalid section index: " + Twine(Index)); + } + + if (S == &InputSection<ELFT>::Discarded) + return S; + return S->Repl; +} + +template <class ELFT> +SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { + int Binding = Sym->getBinding(); + InputSectionBase<ELFT> *Sec = getSection(*Sym); + + uint8_t StOther = Sym->st_other; + uint8_t Type = Sym->getType(); + uintX_t Value = Sym->st_value; + uintX_t Size = Sym->st_size; + + if (Binding == STB_LOCAL) { + if (Sym->getType() == STT_FILE) + SourceFile = check(Sym->getName(this->StringTable)); + + if (this->StringTable.size() <= Sym->st_name) + fatal(toString(this) + ": invalid symbol name offset"); + + StringRefZ Name = this->StringTable.data() + Sym->st_name; + if (Sym->st_shndx == SHN_UNDEF) + return new (BAlloc) + Undefined<ELFT>(Name, /*IsLocal=*/true, StOther, Type, this); + + return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther, + Type, Value, Size, Sec, this); + } + + StringRef Name = check(Sym->getName(this->StringTable)); + + switch (Sym->st_shndx) { + case SHN_UNDEF: + return elf::Symtab<ELFT>::X + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, + /*CanOmitFromDynSym=*/false, this) + ->body(); + case SHN_COMMON: + if (Value == 0 || Value >= UINT32_MAX) + fatal(toString(this) + ": common symbol '" + Name + + "' has invalid alignment: " + Twine(Value)); + return elf::Symtab<ELFT>::X + ->addCommon(Name, Size, Value, Binding, StOther, Type, this) + ->body(); + } + + switch (Binding) { + default: + fatal(toString(this) + ": unexpected binding: " + Twine(Binding)); + case STB_GLOBAL: + case STB_WEAK: + case STB_GNU_UNIQUE: + if (Sec == &InputSection<ELFT>::Discarded) + return elf::Symtab<ELFT>::X + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, + /*CanOmitFromDynSym=*/false, this) + ->body(); + return elf::Symtab<ELFT>::X + ->addRegular(Name, StOther, Type, Value, Size, Binding, Sec, this) + ->body(); + } +} + +template <class ELFT> void ArchiveFile::parse() { + File = check(Archive::create(MB), + MB.getBufferIdentifier() + ": failed to parse archive"); + + // Read the symbol table to construct Lazy objects. + for (const Archive::Symbol &Sym : File->symbols()) + Symtab<ELFT>::X->addLazyArchive(this, Sym); +} + +// Returns a buffer pointing to a member file containing a given symbol. +std::pair<MemoryBufferRef, uint64_t> +ArchiveFile::getMember(const Archive::Symbol *Sym) { + Archive::Child C = + check(Sym->getMember(), + "could not get the member for symbol " + Sym->getName()); + + if (!Seen.insert(C.getChildOffset()).second) + return {MemoryBufferRef(), 0}; + + MemoryBufferRef Ret = + check(C.getMemoryBufferRef(), + "could not get the buffer for the member defining symbol " + + Sym->getName()); + + if (C.getParent()->isThin() && Tar) + Tar->append(relativeToRoot(check(C.getFullName())), Ret.getBuffer()); + if (C.getParent()->isThin()) + return {Ret, 0}; + return {Ret, C.getChildOffset()}; +} + +template <class ELFT> +SharedFile<ELFT>::SharedFile(MemoryBufferRef M) + : ELFFileBase<ELFT>(Base::SharedKind, M), AsNeeded(Config->AsNeeded) {} + +template <class ELFT> +const typename ELFT::Shdr * +SharedFile<ELFT>::getSection(const Elf_Sym &Sym) const { + return check( + this->getObj().getSection(&Sym, this->Symbols, this->SymtabSHNDX)); +} + +// Partially parse the shared object file so that we can call +// getSoName on this object. +template <class ELFT> void SharedFile<ELFT>::parseSoName() { + const Elf_Shdr *DynamicSec = nullptr; + + const ELFFile<ELFT> Obj = this->getObj(); + ArrayRef<Elf_Shdr> Sections = check(Obj.sections()); + for (const Elf_Shdr &Sec : Sections) { + switch (Sec.sh_type) { + default: + continue; + case SHT_DYNSYM: + this->initSymtab(Sections, &Sec); + break; + case SHT_DYNAMIC: + DynamicSec = &Sec; + break; + case SHT_SYMTAB_SHNDX: + this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, Sections)); + break; + case SHT_GNU_versym: + this->VersymSec = &Sec; + break; + case SHT_GNU_verdef: + this->VerdefSec = &Sec; + break; + } + } + + if (this->VersymSec && this->Symbols.empty()) + error("SHT_GNU_versym should be associated with symbol table"); + + // DSOs are identified by soname, and they usually contain + // DT_SONAME tag in their header. But if they are missing, + // filenames are used as default sonames. + SoName = sys::path::filename(this->getName()); + + if (!DynamicSec) + return; + + ArrayRef<Elf_Dyn> Arr = + check(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec), + toString(this) + ": getSectionContentsAsArray failed"); + for (const Elf_Dyn &Dyn : Arr) { + if (Dyn.d_tag == DT_SONAME) { + uintX_t Val = Dyn.getVal(); + if (Val >= this->StringTable.size()) + fatal(toString(this) + ": invalid DT_SONAME entry"); + SoName = StringRef(this->StringTable.data() + Val); + return; + } + } +} + +// Parse the version definitions in the object file if present. Returns a vector +// whose nth element contains a pointer to the Elf_Verdef for version identifier +// n. Version identifiers that are not definitions map to nullptr. The array +// always has at least length 1. +template <class ELFT> +std::vector<const typename ELFT::Verdef *> +SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) { + std::vector<const Elf_Verdef *> Verdefs(1); + // We only need to process symbol versions for this DSO if it has both a + // versym and a verdef section, which indicates that the DSO contains symbol + // version definitions. + if (!VersymSec || !VerdefSec) + return Verdefs; + + // The location of the first global versym entry. + const char *Base = this->MB.getBuffer().data(); + Versym = reinterpret_cast<const Elf_Versym *>(Base + VersymSec->sh_offset) + + this->FirstNonLocal; + + // We cannot determine the largest verdef identifier without inspecting + // every Elf_Verdef, but both bfd and gold assign verdef identifiers + // sequentially starting from 1, so we predict that the largest identifier + // will be VerdefCount. + unsigned VerdefCount = VerdefSec->sh_info; + Verdefs.resize(VerdefCount + 1); + + // Build the Verdefs array by following the chain of Elf_Verdef objects + // from the start of the .gnu.version_d section. + const char *Verdef = Base + VerdefSec->sh_offset; + for (unsigned I = 0; I != VerdefCount; ++I) { + auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef); + Verdef += CurVerdef->vd_next; + unsigned VerdefIndex = CurVerdef->vd_ndx; + if (Verdefs.size() <= VerdefIndex) + Verdefs.resize(VerdefIndex + 1); + Verdefs[VerdefIndex] = CurVerdef; + } + + return Verdefs; +} + +// Fully parse the shared object file. This must be called after parseSoName(). +template <class ELFT> void SharedFile<ELFT>::parseRest() { + // Create mapping from version identifiers to Elf_Verdef entries. + const Elf_Versym *Versym = nullptr; + std::vector<const Elf_Verdef *> Verdefs = parseVerdefs(Versym); + + Elf_Sym_Range Syms = this->getGlobalSymbols(); + for (const Elf_Sym &Sym : Syms) { + unsigned VersymIndex = 0; + if (Versym) { + VersymIndex = Versym->vs_index; + ++Versym; + } + bool Hidden = VersymIndex & VERSYM_HIDDEN; + VersymIndex = VersymIndex & ~VERSYM_HIDDEN; + + StringRef Name = check(Sym.getName(this->StringTable)); + if (Sym.isUndefined()) { + Undefs.push_back(Name); + continue; + } + + // Ignore local symbols. + if (Versym && VersymIndex == VER_NDX_LOCAL) + continue; + + const Elf_Verdef *V = + VersymIndex == VER_NDX_GLOBAL ? nullptr : Verdefs[VersymIndex]; + + if (!Hidden) + elf::Symtab<ELFT>::X->addShared(this, Name, Sym, V); + + // Also add the symbol with the versioned name to handle undefined symbols + // with explicit versions. + if (V) { + StringRef VerName = this->StringTable.data() + V->getAux()->vda_name; + Name = Saver.save(Twine(Name) + "@" + VerName); + elf::Symtab<ELFT>::X->addShared(this, Name, Sym, V); + } + } +} + +static ELFKind getBitcodeELFKind(MemoryBufferRef MB) { + Triple T(check(getBitcodeTargetTriple(MB))); + if (T.isLittleEndian()) + return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind; + return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind; +} + +static uint8_t getBitcodeMachineKind(MemoryBufferRef MB) { + Triple T(check(getBitcodeTargetTriple(MB))); + switch (T.getArch()) { + case Triple::aarch64: + return EM_AARCH64; + case Triple::arm: + return EM_ARM; + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: + case Triple::mips64el: + return EM_MIPS; + case Triple::ppc: + return EM_PPC; + case Triple::ppc64: + return EM_PPC64; + case Triple::x86: + return T.isOSIAMCU() ? EM_IAMCU : EM_386; + case Triple::x86_64: + return EM_X86_64; + default: + fatal(MB.getBufferIdentifier() + + ": could not infer e_machine from bitcode target triple " + T.str()); + } +} + +BitcodeFile::BitcodeFile(MemoryBufferRef MB) : InputFile(BitcodeKind, MB) { + EKind = getBitcodeELFKind(MB); + EMachine = getBitcodeMachineKind(MB); +} + +static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { + switch (GvVisibility) { + case GlobalValue::DefaultVisibility: + return STV_DEFAULT; + case GlobalValue::HiddenVisibility: + return STV_HIDDEN; + case GlobalValue::ProtectedVisibility: + return STV_PROTECTED; + } + llvm_unreachable("unknown visibility"); +} + +template <class ELFT> +static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats, + const lto::InputFile::Symbol &ObjSym, + BitcodeFile *F) { + StringRef NameRef = Saver.save(ObjSym.getName()); + uint32_t Flags = ObjSym.getFlags(); + uint32_t Binding = (Flags & BasicSymbolRef::SF_Weak) ? STB_WEAK : STB_GLOBAL; + + uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE; + uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); + bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); + + int C = check(ObjSym.getComdatIndex()); + if (C != -1 && !KeptComdats[C]) + return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, + Visibility, Type, CanOmitFromDynSym, + F); + + if (Flags & BasicSymbolRef::SF_Undefined) + return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, + Visibility, Type, CanOmitFromDynSym, + F); + + if (Flags & BasicSymbolRef::SF_Common) + return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(), + ObjSym.getCommonAlignment(), Binding, + Visibility, STT_OBJECT, F); + + return Symtab<ELFT>::X->addBitcode(NameRef, Binding, Visibility, Type, + CanOmitFromDynSym, F); +} + +template <class ELFT> +void BitcodeFile::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { + + // Here we pass a new MemoryBufferRef which is identified by ArchiveName + // (the fully resolved path of the archive) + member name + offset of the + // member in the archive. + // ThinLTO uses the MemoryBufferRef identifier to access its internal + // data structures and if two archives define two members with the same name, + // this causes a collision which result in only one of the objects being + // taken into consideration at LTO time (which very likely causes undefined + // symbols later in the link stage). + Obj = check(lto::InputFile::create(MemoryBufferRef( + MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier() + + utostr(OffsetInArchive))))); + + std::vector<bool> KeptComdats; + for (StringRef S : Obj->getComdatTable()) { + StringRef N = Saver.save(S); + KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(N)).second); + } + + for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) + Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, this)); +} + +template <template <class> class T> +static InputFile *createELFFile(MemoryBufferRef MB) { + unsigned char Size; + unsigned char Endian; + std::tie(Size, Endian) = getElfArchType(MB.getBuffer()); + if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB) + fatal(MB.getBufferIdentifier() + ": invalid data encoding"); + + size_t BufSize = MB.getBuffer().size(); + if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) || + (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr))) + fatal(MB.getBufferIdentifier() + ": file is too short"); + + InputFile *Obj; + if (Size == ELFCLASS32 && Endian == ELFDATA2LSB) + Obj = make<T<ELF32LE>>(MB); + else if (Size == ELFCLASS32 && Endian == ELFDATA2MSB) + Obj = make<T<ELF32BE>>(MB); + else if (Size == ELFCLASS64 && Endian == ELFDATA2LSB) + Obj = make<T<ELF64LE>>(MB); + else if (Size == ELFCLASS64 && Endian == ELFDATA2MSB) + Obj = make<T<ELF64BE>>(MB); + else + fatal(MB.getBufferIdentifier() + ": invalid file class"); + + if (!Config->FirstElf) + Config->FirstElf = Obj; + return Obj; +} + +template <class ELFT> void BinaryFile::parse() { + StringRef Buf = MB.getBuffer(); + ArrayRef<uint8_t> Data = + makeArrayRef<uint8_t>((const uint8_t *)Buf.data(), Buf.size()); + + std::string Filename = MB.getBufferIdentifier(); + std::transform(Filename.begin(), Filename.end(), Filename.begin(), + [](char C) { return isalnum(C) ? C : '_'; }); + Filename = "_binary_" + Filename; + StringRef StartName = Saver.save(Twine(Filename) + "_start"); + StringRef EndName = Saver.save(Twine(Filename) + "_end"); + StringRef SizeName = Saver.save(Twine(Filename) + "_size"); + + auto *Section = + make<InputSection<ELFT>>(SHF_ALLOC, SHT_PROGBITS, 8, Data, ".data"); + Sections.push_back(Section); + + elf::Symtab<ELFT>::X->addRegular(StartName, STV_DEFAULT, STT_OBJECT, 0, 0, + STB_GLOBAL, Section, nullptr); + elf::Symtab<ELFT>::X->addRegular(EndName, STV_DEFAULT, STT_OBJECT, + Data.size(), 0, STB_GLOBAL, Section, + nullptr); + elf::Symtab<ELFT>::X->addRegular(SizeName, STV_DEFAULT, STT_OBJECT, + Data.size(), 0, STB_GLOBAL, nullptr, + nullptr); +} + +static bool isBitcode(MemoryBufferRef MB) { + using namespace sys::fs; + return identify_magic(MB.getBuffer()) == file_magic::bitcode; +} + +InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName, + uint64_t OffsetInArchive) { + InputFile *F = + isBitcode(MB) ? make<BitcodeFile>(MB) : createELFFile<ObjectFile>(MB); + F->ArchiveName = ArchiveName; + F->OffsetInArchive = OffsetInArchive; + return F; +} + +InputFile *elf::createSharedFile(MemoryBufferRef MB) { + return createELFFile<SharedFile>(MB); +} + +MemoryBufferRef LazyObjectFile::getBuffer() { + if (Seen) + return MemoryBufferRef(); + Seen = true; + return MB; +} + +template <class ELFT> void LazyObjectFile::parse() { + for (StringRef Sym : getSymbols()) + Symtab<ELFT>::X->addLazyObject(Sym, *this); +} + +template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() { + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::SymRange Elf_Sym_Range; + + const ELFFile<ELFT> Obj(this->MB.getBuffer()); + ArrayRef<Elf_Shdr> Sections = check(Obj.sections()); + for (const Elf_Shdr &Sec : Sections) { + if (Sec.sh_type != SHT_SYMTAB) + continue; + Elf_Sym_Range Syms = check(Obj.symbols(&Sec)); + uint32_t FirstNonLocal = Sec.sh_info; + StringRef StringTable = check(Obj.getStringTableForSymtab(Sec, Sections)); + std::vector<StringRef> V; + for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal)) + if (Sym.st_shndx != SHN_UNDEF) + V.push_back(check(Sym.getName(StringTable))); + return V; + } + return {}; +} + +std::vector<StringRef> LazyObjectFile::getBitcodeSymbols() { + std::unique_ptr<lto::InputFile> Obj = check(lto::InputFile::create(this->MB)); + std::vector<StringRef> V; + for (const lto::InputFile::Symbol &Sym : Obj->symbols()) + if (!(Sym.getFlags() & BasicSymbolRef::SF_Undefined)) + V.push_back(Saver.save(Sym.getName())); + return V; +} + +// Returns a vector of globally-visible defined symbol names. +std::vector<StringRef> LazyObjectFile::getSymbols() { + if (isBitcode(this->MB)) + return getBitcodeSymbols(); + + unsigned char Size; + unsigned char Endian; + std::tie(Size, Endian) = getElfArchType(this->MB.getBuffer()); + if (Size == ELFCLASS32) { + if (Endian == ELFDATA2LSB) + return getElfSymbols<ELF32LE>(); + return getElfSymbols<ELF32BE>(); + } + if (Endian == ELFDATA2LSB) + return getElfSymbols<ELF64LE>(); + return getElfSymbols<ELF64BE>(); +} + +template void ArchiveFile::parse<ELF32LE>(); +template void ArchiveFile::parse<ELF32BE>(); +template void ArchiveFile::parse<ELF64LE>(); +template void ArchiveFile::parse<ELF64BE>(); + +template void BitcodeFile::parse<ELF32LE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF32BE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF64LE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF64BE>(DenseSet<CachedHashStringRef> &); + +template void LazyObjectFile::parse<ELF32LE>(); +template void LazyObjectFile::parse<ELF32BE>(); +template void LazyObjectFile::parse<ELF64LE>(); +template void LazyObjectFile::parse<ELF64BE>(); + +template class elf::ELFFileBase<ELF32LE>; +template class elf::ELFFileBase<ELF32BE>; +template class elf::ELFFileBase<ELF64LE>; +template class elf::ELFFileBase<ELF64BE>; + +template class elf::ObjectFile<ELF32LE>; +template class elf::ObjectFile<ELF32BE>; +template class elf::ObjectFile<ELF64LE>; +template class elf::ObjectFile<ELF64BE>; + +template class elf::SharedFile<ELF32LE>; +template class elf::SharedFile<ELF32BE>; +template class elf::SharedFile<ELF64LE>; +template class elf::SharedFile<ELF64BE>; + +template void BinaryFile::parse<ELF32LE>(); +template void BinaryFile::parse<ELF32BE>(); +template void BinaryFile::parse<ELF64LE>(); +template void BinaryFile::parse<ELF64BE>(); diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.h b/contrib/llvm/tools/lld/ELF/InputFiles.h new file mode 100644 index 000000000000..73dda7b566b8 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/InputFiles.h @@ -0,0 +1,342 @@ +//===- InputFiles.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_INPUT_FILES_H +#define LLD_ELF_INPUT_FILES_H + +#include "Config.h" +#include "InputSection.h" +#include "Error.h" +#include "Symbols.h" + +#include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Comdat.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/IRObjectFile.h" + +#include <map> + +namespace llvm { +class DWARFDebugLine; +class TarWriter; +namespace lto { +class InputFile; +} +} + +namespace lld { +namespace elf { +class InputFile; +} + +// Returns "(internal)", "foo.a(bar.o)" or "baz.o". +std::string toString(const elf::InputFile *F); + +namespace elf { + +using llvm::object::Archive; + +class Lazy; +class SymbolBody; + +// If -reproduce option is given, all input files are written +// to this tar archive. +extern llvm::TarWriter *Tar; + +// Opens a given file. +llvm::Optional<MemoryBufferRef> readFile(StringRef Path); + +// The root class of input files. +class InputFile { +public: + enum Kind { + ObjectKind, + SharedKind, + LazyObjectKind, + ArchiveKind, + BitcodeKind, + BinaryKind, + }; + + Kind kind() const { return FileKind; } + + StringRef getName() const { return MB.getBufferIdentifier(); } + MemoryBufferRef MB; + + // Filename of .a which contained this file. If this file was + // not in an archive file, it is the empty string. We use this + // string for creating error messages. + StringRef ArchiveName; + + // If this file is in an archive, the member contains the offset of + // the file in the archive. Otherwise, it's just zero. We store this + // field so that we can pass it to lib/LTO in order to disambiguate + // between objects. + uint64_t OffsetInArchive; + + // If this is an architecture-specific file, the following members + // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. + ELFKind EKind = ELFNoneKind; + uint16_t EMachine = llvm::ELF::EM_NONE; + uint8_t OSABI = 0; + +protected: + InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} + +private: + const Kind FileKind; +}; + +template <typename ELFT> class ELFFileBase : public InputFile { +public: + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::SymRange Elf_Sym_Range; + + ELFFileBase(Kind K, MemoryBufferRef M); + static bool classof(const InputFile *F) { + Kind K = F->kind(); + return K == ObjectKind || K == SharedKind; + } + + llvm::object::ELFFile<ELFT> getObj() const { + return llvm::object::ELFFile<ELFT>(MB.getBuffer()); + } + + StringRef getStringTable() const { return StringTable; } + + uint32_t getSectionIndex(const Elf_Sym &Sym) const; + + Elf_Sym_Range getGlobalSymbols(); + +protected: + ArrayRef<Elf_Sym> Symbols; + uint32_t FirstNonLocal = 0; + ArrayRef<Elf_Word> SymtabSHNDX; + StringRef StringTable; + void initSymtab(ArrayRef<Elf_Shdr> Sections, const Elf_Shdr *Symtab); +}; + +// .o file. +template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> { + typedef ELFFileBase<ELFT> Base; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::SymRange Elf_Sym_Range; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::uint uintX_t; + + StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr &Sec); + ArrayRef<Elf_Word> getShtGroupEntries(const Elf_Shdr &Sec); + +public: + static bool classof(const InputFile *F) { + return F->kind() == Base::ObjectKind; + } + + ArrayRef<SymbolBody *> getSymbols(); + ArrayRef<SymbolBody *> getLocalSymbols(); + ArrayRef<SymbolBody *> getNonLocalSymbols(); + + explicit ObjectFile(MemoryBufferRef M); + void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); + + ArrayRef<InputSectionBase<ELFT> *> getSections() const { return Sections; } + InputSectionBase<ELFT> *getSection(const Elf_Sym &Sym) const; + + SymbolBody &getSymbolBody(uint32_t SymbolIndex) const { + if (SymbolIndex >= SymbolBodies.size()) + fatal(toString(this) + ": invalid symbol index"); + return *SymbolBodies[SymbolIndex]; + } + + template <typename RelT> + SymbolBody &getRelocTargetSym(const RelT &Rel) const { + uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL); + return getSymbolBody(SymIndex); + } + + // Returns source line information for a given offset. + // If no information is available, returns "". + std::string getLineInfo(InputSectionBase<ELFT> *S, uintX_t Offset); + + // MIPS GP0 value defined by this file. This value represents the gp value + // used to create the relocatable object and required to support + // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. + uint32_t MipsGp0 = 0; + + // The number is the offset in the string table. It will be used as the + // st_name of the symbol. + std::vector<std::pair<const DefinedRegular<ELFT> *, unsigned>> KeptLocalSyms; + + // Name of source file obtained from STT_FILE symbol value, + // or empty string if there is no such symbol in object file + // symbol table. + StringRef SourceFile; + +private: + void + initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); + void initializeSymbols(); + void initializeDwarfLine(); + InputSectionBase<ELFT> *getRelocTarget(const Elf_Shdr &Sec); + InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec, + StringRef SectionStringTable); + + bool shouldMerge(const Elf_Shdr &Sec); + SymbolBody *createSymbolBody(const Elf_Sym *Sym); + + // List of all sections defined by this file. + std::vector<InputSectionBase<ELFT> *> Sections; + + // List of all symbols referenced or defined by this file. + std::vector<SymbolBody *> SymbolBodies; + + // Debugging information to retrieve source file and line for error + // reporting. Linker may find reasonable number of errors in a + // single object file, so we cache debugging information in order to + // parse it only once for each object file we link. + std::unique_ptr<llvm::DWARFDebugLine> DwarfLine; +}; + +// LazyObjectFile is analogous to ArchiveFile in the sense that +// the file contains lazy symbols. The difference is that +// LazyObjectFile wraps a single file instead of multiple files. +// +// This class is used for --start-lib and --end-lib options which +// instruct the linker to link object files between them with the +// archive file semantics. +class LazyObjectFile : public InputFile { +public: + explicit LazyObjectFile(MemoryBufferRef M) : InputFile(LazyObjectKind, M) {} + + static bool classof(const InputFile *F) { + return F->kind() == LazyObjectKind; + } + + template <class ELFT> void parse(); + MemoryBufferRef getBuffer(); + +private: + std::vector<StringRef> getSymbols(); + template <class ELFT> std::vector<StringRef> getElfSymbols(); + std::vector<StringRef> getBitcodeSymbols(); + + bool Seen = false; +}; + +// An ArchiveFile object represents a .a file. +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } + template <class ELFT> void parse(); + + // Returns a memory buffer for a given symbol and the offset in the archive + // for the member. An empty memory buffer and an offset of zero + // is returned if we have already returned the same memory buffer. + // (So that we don't instantiate same members more than once.) + std::pair<MemoryBufferRef, uint64_t> getMember(const Archive::Symbol *Sym); + +private: + std::unique_ptr<Archive> File; + llvm::DenseSet<uint64_t> Seen; +}; + +class BitcodeFile : public InputFile { +public: + explicit BitcodeFile(MemoryBufferRef M); + static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } + template <class ELFT> + void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); + ArrayRef<Symbol *> getSymbols() { return Symbols; } + std::unique_ptr<llvm::lto::InputFile> Obj; + +private: + std::vector<Symbol *> Symbols; +}; + +// .so file. +template <class ELFT> class SharedFile : public ELFFileBase<ELFT> { + typedef ELFFileBase<ELFT> Base; + typedef typename ELFT::Dyn Elf_Dyn; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::SymRange Elf_Sym_Range; + typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::Versym Elf_Versym; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::uint uintX_t; + + std::vector<StringRef> Undefs; + StringRef SoName; + const Elf_Shdr *VersymSec = nullptr; + const Elf_Shdr *VerdefSec = nullptr; + +public: + StringRef getSoName() const { return SoName; } + const Elf_Shdr *getSection(const Elf_Sym &Sym) const; + llvm::ArrayRef<StringRef> getUndefinedSymbols() { return Undefs; } + + static bool classof(const InputFile *F) { + return F->kind() == Base::SharedKind; + } + + explicit SharedFile(MemoryBufferRef M); + + void parseSoName(); + void parseRest(); + std::vector<const Elf_Verdef *> parseVerdefs(const Elf_Versym *&Versym); + + struct NeededVer { + // The string table offset of the version name in the output file. + size_t StrTab; + + // The version identifier for this version name. + uint16_t Index; + }; + + // Mapping from Elf_Verdef data structures to information about Elf_Vernaux + // data structures in the output file. + std::map<const Elf_Verdef *, NeededVer> VerdefMap; + + // Used for --as-needed + bool AsNeeded = false; + bool IsUsed = false; + bool isNeeded() const { return !AsNeeded || IsUsed; } +}; + +class BinaryFile : public InputFile { +public: + explicit BinaryFile(MemoryBufferRef M) : InputFile(BinaryKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == BinaryKind; } + template <class ELFT> void parse(); + ArrayRef<InputSectionData *> getSections() const { return Sections; } + +private: + std::vector<InputSectionData *> Sections; +}; + +InputFile *createObjectFile(MemoryBufferRef MB, StringRef ArchiveName = "", + uint64_t OffsetInArchive = 0); +InputFile *createSharedFile(MemoryBufferRef MB); + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/InputSection.cpp b/contrib/llvm/tools/lld/ELF/InputSection.cpp new file mode 100644 index 000000000000..e87d92aa207c --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/InputSection.cpp @@ -0,0 +1,850 @@ +//===- InputSection.cpp ---------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputSection.h" +#include "Config.h" +#include "EhFrame.h" +#include "Error.h" +#include "InputFiles.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "OutputSections.h" +#include "Relocations.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Thunks.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/Endian.h" +#include <mutex> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +// Returns a string to construct an error message. +template <class ELFT> +std::string lld::toString(const InputSectionBase<ELFT> *Sec) { + return (Sec->getFile()->getName() + ":(" + Sec->Name + ")").str(); +} + +template <class ELFT> +static ArrayRef<uint8_t> getSectionContents(elf::ObjectFile<ELFT> *File, + const typename ELFT::Shdr *Hdr) { + if (!File || Hdr->sh_type == SHT_NOBITS) + return makeArrayRef<uint8_t>(nullptr, Hdr->sh_size); + return check(File->getObj().getSectionContents(Hdr)); +} + +template <class ELFT> +InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File, + uintX_t Flags, uint32_t Type, + uintX_t Entsize, uint32_t Link, + uint32_t Info, uintX_t Addralign, + ArrayRef<uint8_t> Data, StringRef Name, + Kind SectionKind) + : InputSectionData(SectionKind, Name, Data, + !Config->GcSections || !(Flags & SHF_ALLOC)), + File(File), Flags(Flags), Entsize(Entsize), Type(Type), Link(Link), + Info(Info), Repl(this) { + NumRelocations = 0; + AreRelocsRela = false; + + // The ELF spec states that a value of 0 means the section has + // no alignment constraits. + uint64_t V = std::max<uint64_t>(Addralign, 1); + if (!isPowerOf2_64(V)) + fatal(toString(File) + ": section sh_addralign is not a power of 2"); + + // We reject object files having insanely large alignments even though + // they are allowed by the spec. I think 4GB is a reasonable limitation. + // We might want to relax this in the future. + if (V > UINT32_MAX) + fatal(toString(File) + ": section sh_addralign is too large"); + Alignment = V; + + // If it is not a mergeable section, overwrite the flag so that the flag + // is consistent with the class. This inconsistency could occur when + // string merging is disabled using -O0 flag. + if (!Config->Relocatable && !isa<MergeInputSection<ELFT>>(this)) + this->Flags &= ~(SHF_MERGE | SHF_STRINGS); +} + +template <class ELFT> +InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File, + const Elf_Shdr *Hdr, StringRef Name, + Kind SectionKind) + : InputSectionBase(File, Hdr->sh_flags & ~SHF_INFO_LINK, Hdr->sh_type, + Hdr->sh_entsize, Hdr->sh_link, Hdr->sh_info, + Hdr->sh_addralign, getSectionContents(File, Hdr), Name, + SectionKind) { + this->Offset = Hdr->sh_offset; +} + +template <class ELFT> size_t InputSectionBase<ELFT>::getSize() const { + if (auto *S = dyn_cast<SyntheticSection<ELFT>>(this)) + return S->getSize(); + + if (auto *D = dyn_cast<InputSection<ELFT>>(this)) + if (D->getThunksSize() > 0) + return D->getThunkOff() + D->getThunksSize(); + + return Data.size(); +} + +// Returns a string for an error message. +template <class SectionT> static std::string getName(SectionT *Sec) { + return (Sec->getFile()->getName() + ":(" + Sec->Name + ")").str(); +} + +template <class ELFT> +typename ELFT::uint InputSectionBase<ELFT>::getOffset(uintX_t Offset) const { + switch (kind()) { + case Regular: + return cast<InputSection<ELFT>>(this)->OutSecOff + Offset; + case Synthetic: + // For synthetic sections we treat offset -1 as the end of the section. + // The same approach is used for synthetic symbols (DefinedSynthetic). + return cast<InputSection<ELFT>>(this)->OutSecOff + + (Offset == uintX_t(-1) ? getSize() : Offset); + case EHFrame: + // The file crtbeginT.o has relocations pointing to the start of an empty + // .eh_frame that is known to be the first in the link. It does that to + // identify the start of the output .eh_frame. + return Offset; + case Merge: + return cast<MergeInputSection<ELFT>>(this)->getOffset(Offset); + } + llvm_unreachable("invalid section kind"); +} + +template <class ELFT> bool InputSectionBase<ELFT>::isCompressed() const { + return (Flags & SHF_COMPRESSED) || Name.startswith(".zdebug"); +} + +// Returns compressed data and its size when uncompressed. +template <class ELFT> +std::pair<ArrayRef<uint8_t>, uint64_t> +InputSectionBase<ELFT>::getElfCompressedData(ArrayRef<uint8_t> Data) { + // Compressed section with Elf_Chdr is the ELF standard. + if (Data.size() < sizeof(Elf_Chdr)) + fatal(toString(this) + ": corrupted compressed section"); + auto *Hdr = reinterpret_cast<const Elf_Chdr *>(Data.data()); + if (Hdr->ch_type != ELFCOMPRESS_ZLIB) + fatal(toString(this) + ": unsupported compression type"); + return {Data.slice(sizeof(*Hdr)), Hdr->ch_size}; +} + +// Returns compressed data and its size when uncompressed. +template <class ELFT> +std::pair<ArrayRef<uint8_t>, uint64_t> +InputSectionBase<ELFT>::getRawCompressedData(ArrayRef<uint8_t> Data) { + // Compressed sections without Elf_Chdr header contain this header + // instead. This is a GNU extension. + struct ZlibHeader { + char Magic[4]; // Should be "ZLIB" + char Size[8]; // Uncompressed size in big-endian + }; + + if (Data.size() < sizeof(ZlibHeader)) + fatal(toString(this) + ": corrupted compressed section"); + auto *Hdr = reinterpret_cast<const ZlibHeader *>(Data.data()); + if (memcmp(Hdr->Magic, "ZLIB", 4)) + fatal(toString(this) + ": broken ZLIB-compressed section"); + return {Data.slice(sizeof(*Hdr)), read64be(Hdr->Size)}; +} + +// Uncompress section contents. Note that this function is called +// from parallel_for_each, so it must be thread-safe. +template <class ELFT> void InputSectionBase<ELFT>::uncompress() { + if (!zlib::isAvailable()) + fatal(toString(this) + + ": build lld with zlib to enable compressed sections support"); + + // This section is compressed. Here we decompress it. Ideally, all + // compressed sections have SHF_COMPRESSED bit and their contents + // start with headers of Elf_Chdr type. However, sections whose + // names start with ".zdebug_" don't have the bit and contains a raw + // ZLIB-compressed data (which is a bad thing because section names + // shouldn't be significant in ELF.) We need to be able to read both. + ArrayRef<uint8_t> Buf; // Compressed data + size_t Size; // Uncompressed size + if (Flags & SHF_COMPRESSED) + std::tie(Buf, Size) = getElfCompressedData(Data); + else + std::tie(Buf, Size) = getRawCompressedData(Data); + + // Uncompress Buf. + char *OutputBuf; + { + static std::mutex Mu; + std::lock_guard<std::mutex> Lock(Mu); + OutputBuf = BAlloc.Allocate<char>(Size); + } + if (zlib::uncompress(toStringRef(Buf), OutputBuf, Size) != zlib::StatusOK) + fatal(toString(this) + ": error while uncompressing section"); + Data = ArrayRef<uint8_t>((uint8_t *)OutputBuf, Size); +} + +template <class ELFT> +typename ELFT::uint +InputSectionBase<ELFT>::getOffset(const DefinedRegular<ELFT> &Sym) const { + return getOffset(Sym.Value); +} + +template <class ELFT> +InputSectionBase<ELFT> *InputSectionBase<ELFT>::getLinkOrderDep() const { + if ((Flags & SHF_LINK_ORDER) && Link != 0) + return getFile()->getSections()[Link]; + return nullptr; +} + +// Returns a source location string. Used to construct an error message. +template <class ELFT> +std::string InputSectionBase<ELFT>::getLocation(typename ELFT::uint Offset) { + // First check if we can get desired values from debugging information. + std::string LineInfo = File->getLineInfo(this, Offset); + if (!LineInfo.empty()) + return LineInfo; + + // File->SourceFile contains STT_FILE symbol that contains a + // source file name. If it's missing, we use an object file name. + std::string SrcFile = File->SourceFile; + if (SrcFile.empty()) + SrcFile = toString(File); + + // Find a function symbol that encloses a given location. + for (SymbolBody *B : File->getSymbols()) + if (auto *D = dyn_cast<DefinedRegular<ELFT>>(B)) + if (D->Section == this && D->Type == STT_FUNC) + if (D->Value <= Offset && Offset < D->Value + D->Size) + return SrcFile + ":(function " + toString(*D) + ")"; + + // If there's no symbol, print out the offset in the section. + return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str(); +} + +template <class ELFT> +InputSection<ELFT>::InputSection() : InputSectionBase<ELFT>() {} + +template <class ELFT> +InputSection<ELFT>::InputSection(uintX_t Flags, uint32_t Type, + uintX_t Addralign, ArrayRef<uint8_t> Data, + StringRef Name, Kind K) + : InputSectionBase<ELFT>(nullptr, Flags, Type, + /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Addralign, + Data, Name, K) {} + +template <class ELFT> +InputSection<ELFT>::InputSection(elf::ObjectFile<ELFT> *F, + const Elf_Shdr *Header, StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, Base::Regular) {} + +template <class ELFT> +bool InputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == Base::Regular || S->kind() == Base::Synthetic; +} + +template <class ELFT> +InputSectionBase<ELFT> *InputSection<ELFT>::getRelocatedSection() { + assert(this->Type == SHT_RELA || this->Type == SHT_REL); + ArrayRef<InputSectionBase<ELFT> *> Sections = this->File->getSections(); + return Sections[this->Info]; +} + +template <class ELFT> void InputSection<ELFT>::addThunk(const Thunk<ELFT> *T) { + Thunks.push_back(T); +} + +template <class ELFT> uint64_t InputSection<ELFT>::getThunkOff() const { + return this->Data.size(); +} + +template <class ELFT> uint64_t InputSection<ELFT>::getThunksSize() const { + uint64_t Total = 0; + for (const Thunk<ELFT> *T : Thunks) + Total += T->size(); + return Total; +} + +// This is used for -r. We can't use memcpy to copy relocations because we need +// to update symbol table offset and section index for each relocation. So we +// copy relocations one by one. +template <class ELFT> +template <class RelTy> +void InputSection<ELFT>::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) { + InputSectionBase<ELFT> *RelocatedSection = getRelocatedSection(); + + for (const RelTy &Rel : Rels) { + uint32_t Type = Rel.getType(Config->Mips64EL); + SymbolBody &Body = this->File->getRelocTargetSym(Rel); + + Elf_Rela *P = reinterpret_cast<Elf_Rela *>(Buf); + Buf += sizeof(RelTy); + + if (Config->Rela) + P->r_addend = getAddend<ELFT>(Rel); + P->r_offset = RelocatedSection->getOffset(Rel.r_offset); + P->setSymbolAndType(Body.DynsymIndex, Type, Config->Mips64EL); + } +} + +static uint32_t getARMUndefinedRelativeWeakVA(uint32_t Type, uint32_t A, + uint32_t P) { + switch (Type) { + case R_ARM_THM_JUMP11: + return P + 2; + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_PREL31: + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + return P + 4; + case R_ARM_THM_CALL: + // We don't want an interworking BLX to ARM + return P + 5; + default: + return A; + } +} + +static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t Type, uint64_t A, + uint64_t P) { + switch (Type) { + case R_AARCH64_CALL26: + case R_AARCH64_CONDBR19: + case R_AARCH64_JUMP26: + case R_AARCH64_TSTBR14: + return P + 4; + default: + return A; + } +} + +template <class ELFT> +static typename ELFT::uint +getRelocTargetVA(uint32_t Type, typename ELFT::uint A, typename ELFT::uint P, + const SymbolBody &Body, RelExpr Expr) { + switch (Expr) { + case R_HINT: + case R_TLSDESC_CALL: + llvm_unreachable("cannot relocate hint relocs"); + case R_TLSLD: + return In<ELFT>::Got->getTlsIndexOff() + A - In<ELFT>::Got->getSize(); + case R_TLSLD_PC: + return In<ELFT>::Got->getTlsIndexVA() + A - P; + case R_THUNK_ABS: + return Body.getThunkVA<ELFT>() + A; + case R_THUNK_PC: + case R_THUNK_PLT_PC: + return Body.getThunkVA<ELFT>() + A - P; + case R_PPC_TOC: + return getPPC64TocBase() + A; + case R_TLSGD: + return In<ELFT>::Got->getGlobalDynOffset(Body) + A - + In<ELFT>::Got->getSize(); + case R_TLSGD_PC: + return In<ELFT>::Got->getGlobalDynAddr(Body) + A - P; + case R_TLSDESC: + return In<ELFT>::Got->getGlobalDynAddr(Body) + A; + case R_TLSDESC_PAGE: + return getAArch64Page(In<ELFT>::Got->getGlobalDynAddr(Body) + A) - + getAArch64Page(P); + case R_PLT: + return Body.getPltVA<ELFT>() + A; + case R_PLT_PC: + case R_PPC_PLT_OPD: + return Body.getPltVA<ELFT>() + A - P; + case R_SIZE: + return Body.getSize<ELFT>() + A; + case R_GOTREL: + return Body.getVA<ELFT>(A) - In<ELFT>::Got->getVA(); + case R_GOTREL_FROM_END: + return Body.getVA<ELFT>(A) - In<ELFT>::Got->getVA() - + In<ELFT>::Got->getSize(); + case R_RELAX_TLS_GD_TO_IE_END: + case R_GOT_FROM_END: + return Body.getGotOffset<ELFT>() + A - In<ELFT>::Got->getSize(); + case R_RELAX_TLS_GD_TO_IE_ABS: + case R_GOT: + return Body.getGotVA<ELFT>() + A; + case R_RELAX_TLS_GD_TO_IE_PAGE_PC: + case R_GOT_PAGE_PC: + return getAArch64Page(Body.getGotVA<ELFT>() + A) - getAArch64Page(P); + case R_RELAX_TLS_GD_TO_IE: + case R_GOT_PC: + return Body.getGotVA<ELFT>() + A - P; + case R_GOTONLY_PC: + return In<ELFT>::Got->getVA() + A - P; + case R_GOTONLY_PC_FROM_END: + return In<ELFT>::Got->getVA() + A - P + In<ELFT>::Got->getSize(); + case R_RELAX_TLS_LD_TO_LE: + case R_RELAX_TLS_IE_TO_LE: + case R_RELAX_TLS_GD_TO_LE: + case R_TLS: + // A weak undefined TLS symbol resolves to the base of the TLS + // block, i.e. gets a value of zero. If we pass --gc-sections to + // lld and .tbss is not referenced, it gets reclaimed and we don't + // create a TLS program header. Therefore, we resolve this + // statically to zero. + if (Body.isTls() && (Body.isLazy() || Body.isUndefined()) && + Body.symbol()->isWeak()) + return 0; + if (Target->TcbSize) + return Body.getVA<ELFT>(A) + + alignTo(Target->TcbSize, Out<ELFT>::TlsPhdr->p_align); + return Body.getVA<ELFT>(A) - Out<ELFT>::TlsPhdr->p_memsz; + case R_RELAX_TLS_GD_TO_LE_NEG: + case R_NEG_TLS: + return Out<ELF32LE>::TlsPhdr->p_memsz - Body.getVA<ELFT>(A); + case R_ABS: + case R_RELAX_GOT_PC_NOPIC: + return Body.getVA<ELFT>(A); + case R_GOT_OFF: + return Body.getGotOffset<ELFT>() + A; + case R_MIPS_GOT_LOCAL_PAGE: + // If relocation against MIPS local symbol requires GOT entry, this entry + // should be initialized by 'page address'. This address is high 16-bits + // of sum the symbol's value and the addend. + return In<ELFT>::MipsGot->getVA() + + In<ELFT>::MipsGot->getPageEntryOffset(Body, A) - + In<ELFT>::MipsGot->getGp(); + case R_MIPS_GOT_OFF: + case R_MIPS_GOT_OFF32: + // In case of MIPS if a GOT relocation has non-zero addend this addend + // should be applied to the GOT entry content not to the GOT entry offset. + // That is why we use separate expression type. + return In<ELFT>::MipsGot->getVA() + + In<ELFT>::MipsGot->getBodyEntryOffset(Body, A) - + In<ELFT>::MipsGot->getGp(); + case R_MIPS_GOTREL: + return Body.getVA<ELFT>(A) - In<ELFT>::MipsGot->getGp(); + case R_MIPS_TLSGD: + return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() + + In<ELFT>::MipsGot->getGlobalDynOffset(Body) - + In<ELFT>::MipsGot->getGp(); + case R_MIPS_TLSLD: + return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() + + In<ELFT>::MipsGot->getTlsIndexOff() - In<ELFT>::MipsGot->getGp(); + case R_PPC_OPD: { + uint64_t SymVA = Body.getVA<ELFT>(A); + // If we have an undefined weak symbol, we might get here with a symbol + // address of zero. That could overflow, but the code must be unreachable, + // so don't bother doing anything at all. + if (!SymVA) + return 0; + if (Out<ELF64BE>::Opd) { + // If this is a local call, and we currently have the address of a + // function-descriptor, get the underlying code address instead. + uint64_t OpdStart = Out<ELF64BE>::Opd->Addr; + uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->Size; + bool InOpd = OpdStart <= SymVA && SymVA < OpdEnd; + if (InOpd) + SymVA = read64be(&Out<ELF64BE>::OpdBuf[SymVA - OpdStart]); + } + return SymVA - P; + } + case R_PC: + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) { + // On ARM and AArch64 a branch to an undefined weak resolves to the + // next instruction, otherwise the place. + if (Config->EMachine == EM_ARM) + return getARMUndefinedRelativeWeakVA(Type, A, P); + if (Config->EMachine == EM_AARCH64) + return getAArch64UndefinedRelativeWeakVA(Type, A, P); + } + case R_RELAX_GOT_PC: + return Body.getVA<ELFT>(A) - P; + case R_PLT_PAGE_PC: + case R_PAGE_PC: + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) + return getAArch64Page(A); + return getAArch64Page(Body.getVA<ELFT>(A)) - getAArch64Page(P); + } + llvm_unreachable("Invalid expression"); +} + +// This function applies relocations to sections without SHF_ALLOC bit. +// Such sections are never mapped to memory at runtime. Debug sections are +// an example. Relocations in non-alloc sections are much easier to +// handle than in allocated sections because it will never need complex +// treatement such as GOT or PLT (because at runtime no one refers them). +// So, we handle relocations for non-alloc sections directly in this +// function as a performance optimization. +template <class ELFT> +template <class RelTy> +void InputSection<ELFT>::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) { + for (const RelTy &Rel : Rels) { + uint32_t Type = Rel.getType(Config->Mips64EL); + uintX_t Offset = this->getOffset(Rel.r_offset); + uint8_t *BufLoc = Buf + Offset; + uintX_t Addend = getAddend<ELFT>(Rel); + if (!RelTy::IsRela) + Addend += Target->getImplicitAddend(BufLoc, Type); + + SymbolBody &Sym = this->File->getRelocTargetSym(Rel); + if (Target->getRelExpr(Type, Sym) != R_ABS) { + error(this->getLocation(Offset) + ": has non-ABS reloc"); + return; + } + + uintX_t AddrLoc = this->OutSec->Addr + Offset; + uint64_t SymVA = 0; + if (!Sym.isTls() || Out<ELFT>::TlsPhdr) + SymVA = SignExtend64<sizeof(uintX_t) * 8>( + getRelocTargetVA<ELFT>(Type, Addend, AddrLoc, Sym, R_ABS)); + Target->relocateOne(BufLoc, Type, SymVA); + } +} + +template <class ELFT> +void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd) { + // scanReloc function in Writer.cpp constructs Relocations + // vector only for SHF_ALLOC'ed sections. For other sections, + // we handle relocations directly here. + auto *IS = dyn_cast<InputSection<ELFT>>(this); + if (IS && !(IS->Flags & SHF_ALLOC)) { + if (IS->AreRelocsRela) + IS->relocateNonAlloc(Buf, IS->relas()); + else + IS->relocateNonAlloc(Buf, IS->rels()); + return; + } + + const unsigned Bits = sizeof(uintX_t) * 8; + for (const Relocation &Rel : Relocations) { + uintX_t Offset = getOffset(Rel.Offset); + uint8_t *BufLoc = Buf + Offset; + uint32_t Type = Rel.Type; + uintX_t A = Rel.Addend; + + uintX_t AddrLoc = OutSec->Addr + Offset; + RelExpr Expr = Rel.Expr; + uint64_t TargetVA = SignExtend64<Bits>( + getRelocTargetVA<ELFT>(Type, A, AddrLoc, *Rel.Sym, Expr)); + + switch (Expr) { + case R_RELAX_GOT_PC: + case R_RELAX_GOT_PC_NOPIC: + Target->relaxGot(BufLoc, TargetVA); + break; + case R_RELAX_TLS_IE_TO_LE: + Target->relaxTlsIeToLe(BufLoc, Type, TargetVA); + break; + case R_RELAX_TLS_LD_TO_LE: + Target->relaxTlsLdToLe(BufLoc, Type, TargetVA); + break; + case R_RELAX_TLS_GD_TO_LE: + case R_RELAX_TLS_GD_TO_LE_NEG: + Target->relaxTlsGdToLe(BufLoc, Type, TargetVA); + break; + case R_RELAX_TLS_GD_TO_IE: + case R_RELAX_TLS_GD_TO_IE_ABS: + case R_RELAX_TLS_GD_TO_IE_PAGE_PC: + case R_RELAX_TLS_GD_TO_IE_END: + Target->relaxTlsGdToIe(BufLoc, Type, TargetVA); + break; + case R_PPC_PLT_OPD: + // Patch a nop (0x60000000) to a ld. + if (BufLoc + 8 <= BufEnd && read32be(BufLoc + 4) == 0x60000000) + write32be(BufLoc + 4, 0xe8410028); // ld %r2, 40(%r1) + // fallthrough + default: + Target->relocateOne(BufLoc, Type, TargetVA); + break; + } + } +} + +template <class ELFT> void InputSection<ELFT>::writeTo(uint8_t *Buf) { + if (this->Type == SHT_NOBITS) + return; + + if (auto *S = dyn_cast<SyntheticSection<ELFT>>(this)) { + S->writeTo(Buf + OutSecOff); + return; + } + + // If -r is given, then an InputSection may be a relocation section. + if (this->Type == SHT_RELA) { + copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rela>()); + return; + } + if (this->Type == SHT_REL) { + copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rel>()); + return; + } + + // Copy section contents from source object file to output file. + ArrayRef<uint8_t> Data = this->Data; + memcpy(Buf + OutSecOff, Data.data(), Data.size()); + + // Iterate over all relocation sections that apply to this section. + uint8_t *BufEnd = Buf + OutSecOff + Data.size(); + this->relocate(Buf, BufEnd); + + // The section might have a data/code generated by the linker and need + // to be written after the section. Usually these are thunks - small piece + // of code used to jump between "incompatible" functions like PIC and non-PIC + // or if the jump target too far and its address does not fit to the short + // jump istruction. + if (!Thunks.empty()) { + Buf += OutSecOff + getThunkOff(); + for (const Thunk<ELFT> *T : Thunks) { + T->writeTo(Buf); + Buf += T->size(); + } + } +} + +template <class ELFT> +void InputSection<ELFT>::replace(InputSection<ELFT> *Other) { + this->Alignment = std::max(this->Alignment, Other->Alignment); + Other->Repl = this->Repl; + Other->Live = false; +} + +template <class ELFT> +EhInputSection<ELFT>::EhInputSection(elf::ObjectFile<ELFT> *F, + const Elf_Shdr *Header, StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::EHFrame) { + // Mark .eh_frame sections as live by default because there are + // usually no relocations that point to .eh_frames. Otherwise, + // the garbage collector would drop all .eh_frame sections. + this->Live = true; +} + +template <class ELFT> +bool EhInputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == InputSectionBase<ELFT>::EHFrame; +} + +// Returns the index of the first relocation that points to a region between +// Begin and Begin+Size. +template <class IntTy, class RelTy> +static unsigned getReloc(IntTy Begin, IntTy Size, const ArrayRef<RelTy> &Rels, + unsigned &RelocI) { + // Start search from RelocI for fast access. That works because the + // relocations are sorted in .eh_frame. + for (unsigned N = Rels.size(); RelocI < N; ++RelocI) { + const RelTy &Rel = Rels[RelocI]; + if (Rel.r_offset < Begin) + continue; + + if (Rel.r_offset < Begin + Size) + return RelocI; + return -1; + } + return -1; +} + +// .eh_frame is a sequence of CIE or FDE records. +// This function splits an input section into records and returns them. +template <class ELFT> void EhInputSection<ELFT>::split() { + // Early exit if already split. + if (!this->Pieces.empty()) + return; + + if (this->NumRelocations) { + if (this->AreRelocsRela) + split(this->relas()); + else + split(this->rels()); + return; + } + split(makeArrayRef<typename ELFT::Rela>(nullptr, nullptr)); +} + +template <class ELFT> +template <class RelTy> +void EhInputSection<ELFT>::split(ArrayRef<RelTy> Rels) { + ArrayRef<uint8_t> Data = this->Data; + unsigned RelI = 0; + for (size_t Off = 0, End = Data.size(); Off != End;) { + size_t Size = readEhRecordSize<ELFT>(this, Off); + this->Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI)); + // The empty record is the end marker. + if (Size == 4) + break; + Off += Size; + } +} + +static size_t findNull(ArrayRef<uint8_t> A, size_t EntSize) { + // Optimize the common case. + StringRef S((const char *)A.data(), A.size()); + if (EntSize == 1) + return S.find(0); + + for (unsigned I = 0, N = S.size(); I != N; I += EntSize) { + const char *B = S.begin() + I; + if (std::all_of(B, B + EntSize, [](char C) { return C == 0; })) + return I; + } + return StringRef::npos; +} + +// Split SHF_STRINGS section. Such section is a sequence of +// null-terminated strings. +template <class ELFT> +void MergeInputSection<ELFT>::splitStrings(ArrayRef<uint8_t> Data, + size_t EntSize) { + size_t Off = 0; + bool IsAlloc = this->Flags & SHF_ALLOC; + while (!Data.empty()) { + size_t End = findNull(Data, EntSize); + if (End == StringRef::npos) + fatal(toString(this) + ": string is not null terminated"); + size_t Size = End + EntSize; + Pieces.emplace_back(Off, !IsAlloc); + Hashes.push_back(hash_value(toStringRef(Data.slice(0, Size)))); + Data = Data.slice(Size); + Off += Size; + } +} + +// Split non-SHF_STRINGS section. Such section is a sequence of +// fixed size records. +template <class ELFT> +void MergeInputSection<ELFT>::splitNonStrings(ArrayRef<uint8_t> Data, + size_t EntSize) { + size_t Size = Data.size(); + assert((Size % EntSize) == 0); + bool IsAlloc = this->Flags & SHF_ALLOC; + for (unsigned I = 0, N = Size; I != N; I += EntSize) { + Hashes.push_back(hash_value(toStringRef(Data.slice(I, EntSize)))); + Pieces.emplace_back(I, !IsAlloc); + } +} + +template <class ELFT> +MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F, + const Elf_Shdr *Header, + StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::Merge) {} + +// This function is called after we obtain a complete list of input sections +// that need to be linked. This is responsible to split section contents +// into small chunks for further processing. +// +// Note that this function is called from parallel_for_each. This must be +// thread-safe (i.e. no memory allocation from the pools). +template <class ELFT> void MergeInputSection<ELFT>::splitIntoPieces() { + ArrayRef<uint8_t> Data = this->Data; + uintX_t EntSize = this->Entsize; + if (this->Flags & SHF_STRINGS) + splitStrings(Data, EntSize); + else + splitNonStrings(Data, EntSize); + + if (Config->GcSections && (this->Flags & SHF_ALLOC)) + for (uintX_t Off : LiveOffsets) + this->getSectionPiece(Off)->Live = true; +} + +template <class ELFT> +bool MergeInputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == InputSectionBase<ELFT>::Merge; +} + +// Do binary search to get a section piece at a given input offset. +template <class ELFT> +SectionPiece *MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) { + auto *This = static_cast<const MergeInputSection<ELFT> *>(this); + return const_cast<SectionPiece *>(This->getSectionPiece(Offset)); +} + +template <class It, class T, class Compare> +static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) { + size_t Size = std::distance(First, Last); + assert(Size != 0); + while (Size != 1) { + size_t H = Size / 2; + const It MI = First + H; + Size -= H; + First = Comp(Value, *MI) ? First : First + H; + } + return Comp(Value, *First) ? First : First + 1; +} + +template <class ELFT> +const SectionPiece * +MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) const { + uintX_t Size = this->Data.size(); + if (Offset >= Size) + fatal(toString(this) + ": entry is past the end of the section"); + + // Find the element this offset points to. + auto I = fastUpperBound( + Pieces.begin(), Pieces.end(), Offset, + [](const uintX_t &A, const SectionPiece &B) { return A < B.InputOff; }); + --I; + return &*I; +} + +// Returns the offset in an output section for a given input offset. +// Because contents of a mergeable section is not contiguous in output, +// it is not just an addition to a base output offset. +template <class ELFT> +typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) const { + // Initialize OffsetMap lazily. + std::call_once(InitOffsetMap, [&] { + OffsetMap.reserve(Pieces.size()); + for (const SectionPiece &Piece : Pieces) + OffsetMap[Piece.InputOff] = Piece.OutputOff; + }); + + // Find a string starting at a given offset. + auto It = OffsetMap.find(Offset); + if (It != OffsetMap.end()) + return It->second; + + if (!this->Live) + return 0; + + // If Offset is not at beginning of a section piece, it is not in the map. + // In that case we need to search from the original section piece vector. + const SectionPiece &Piece = *this->getSectionPiece(Offset); + if (!Piece.Live) + return 0; + + uintX_t Addend = Offset - Piece.InputOff; + return Piece.OutputOff + Addend; +} + +template class elf::InputSectionBase<ELF32LE>; +template class elf::InputSectionBase<ELF32BE>; +template class elf::InputSectionBase<ELF64LE>; +template class elf::InputSectionBase<ELF64BE>; + +template class elf::InputSection<ELF32LE>; +template class elf::InputSection<ELF32BE>; +template class elf::InputSection<ELF64LE>; +template class elf::InputSection<ELF64BE>; + +template class elf::EhInputSection<ELF32LE>; +template class elf::EhInputSection<ELF32BE>; +template class elf::EhInputSection<ELF64LE>; +template class elf::EhInputSection<ELF64BE>; + +template class elf::MergeInputSection<ELF32LE>; +template class elf::MergeInputSection<ELF32BE>; +template class elf::MergeInputSection<ELF64LE>; +template class elf::MergeInputSection<ELF64BE>; + +template std::string lld::toString(const InputSectionBase<ELF32LE> *); +template std::string lld::toString(const InputSectionBase<ELF32BE> *); +template std::string lld::toString(const InputSectionBase<ELF64LE> *); +template std::string lld::toString(const InputSectionBase<ELF64BE> *); diff --git a/contrib/llvm/tools/lld/ELF/InputSection.h b/contrib/llvm/tools/lld/ELF/InputSection.h new file mode 100644 index 000000000000..fc7a7fb60973 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/InputSection.h @@ -0,0 +1,326 @@ +//===- InputSection.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_INPUT_SECTION_H +#define LLD_ELF_INPUT_SECTION_H + +#include "Config.h" +#include "Relocations.h" +#include "Thunks.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Object/ELF.h" +#include <mutex> + +namespace lld { +namespace elf { + +class DefinedCommon; +class SymbolBody; +struct SectionPiece; + +template <class ELFT> class DefinedRegular; +template <class ELFT> class ObjectFile; +template <class ELFT> class OutputSection; +class OutputSectionBase; + +// We need non-template input section class to store symbol layout +// in linker script parser structures, where we do not have ELFT +// template parameter. For each scripted output section symbol we +// store pointer to preceding InputSectionData object or nullptr, +// if symbol should be placed at the very beginning of the output +// section +class InputSectionData { +public: + enum Kind { Regular, EHFrame, Merge, Synthetic, }; + + // The garbage collector sets sections' Live bits. + // If GC is disabled, all sections are considered live by default. + InputSectionData(Kind SectionKind, StringRef Name, ArrayRef<uint8_t> Data, + bool Live) + : SectionKind(SectionKind), Live(Live), Assigned(false), Name(Name), + Data(Data) {} + +private: + unsigned SectionKind : 3; + +public: + Kind kind() const { return (Kind)SectionKind; } + + unsigned Live : 1; // for garbage collection + unsigned Assigned : 1; // for linker script + uint32_t Alignment; + StringRef Name; + ArrayRef<uint8_t> Data; + + template <typename T> llvm::ArrayRef<T> getDataAs() const { + size_t S = Data.size(); + assert(S % sizeof(T) == 0); + return llvm::makeArrayRef<T>((const T *)Data.data(), S / sizeof(T)); + } + + std::vector<Relocation> Relocations; +}; + +// This corresponds to a section of an input file. +template <class ELFT> class InputSectionBase : public InputSectionData { +protected: + typedef typename ELFT::Chdr Elf_Chdr; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + + // The file this section is from. + ObjectFile<ELFT> *File; + +public: + // These corresponds to the fields in Elf_Shdr. + uintX_t Flags; + uintX_t Offset = 0; + uintX_t Entsize; + uint32_t Type; + uint32_t Link; + uint32_t Info; + + InputSectionBase() + : InputSectionData(Regular, "", ArrayRef<uint8_t>(), false), Repl(this) { + NumRelocations = 0; + AreRelocsRela = false; + } + + InputSectionBase(ObjectFile<ELFT> *File, const Elf_Shdr *Header, + StringRef Name, Kind SectionKind); + InputSectionBase(ObjectFile<ELFT> *File, uintX_t Flags, uint32_t Type, + uintX_t Entsize, uint32_t Link, uint32_t Info, + uintX_t Addralign, ArrayRef<uint8_t> Data, StringRef Name, + Kind SectionKind); + OutputSectionBase *OutSec = nullptr; + + // Relocations that refer to this section. + const Elf_Rel *FirstRelocation = nullptr; + unsigned NumRelocations : 31; + unsigned AreRelocsRela : 1; + ArrayRef<Elf_Rel> rels() const { + assert(!AreRelocsRela); + return llvm::makeArrayRef(FirstRelocation, NumRelocations); + } + ArrayRef<Elf_Rela> relas() const { + assert(AreRelocsRela); + return llvm::makeArrayRef(static_cast<const Elf_Rela *>(FirstRelocation), + NumRelocations); + } + + // This pointer points to the "real" instance of this instance. + // Usually Repl == this. However, if ICF merges two sections, + // Repl pointer of one section points to another section. So, + // if you need to get a pointer to this instance, do not use + // this but instead this->Repl. + InputSectionBase<ELFT> *Repl; + + // Returns the size of this section (even if this is a common or BSS.) + size_t getSize() const; + + ObjectFile<ELFT> *getFile() const { return File; } + llvm::object::ELFFile<ELFT> getObj() const { return File->getObj(); } + uintX_t getOffset(const DefinedRegular<ELFT> &Sym) const; + InputSectionBase *getLinkOrderDep() const; + // Translate an offset in the input section to an offset in the output + // section. + uintX_t getOffset(uintX_t Offset) const; + + // ELF supports ZLIB-compressed section. + // Returns true if the section is compressed. + bool isCompressed() const; + void uncompress(); + + // Returns a source location string. Used to construct an error message. + std::string getLocation(uintX_t Offset); + + void relocate(uint8_t *Buf, uint8_t *BufEnd); + +private: + std::pair<ArrayRef<uint8_t>, uint64_t> + getElfCompressedData(ArrayRef<uint8_t> Data); + + std::pair<ArrayRef<uint8_t>, uint64_t> + getRawCompressedData(ArrayRef<uint8_t> Data); +}; + +// SectionPiece represents a piece of splittable section contents. +// We allocate a lot of these and binary search on them. This means that they +// have to be as compact as possible, which is why we don't store the size (can +// be found by looking at the next one) and put the hash in a side table. +struct SectionPiece { + SectionPiece(size_t Off, bool Live = false) + : InputOff(Off), OutputOff(-1), Live(Live || !Config->GcSections) {} + + size_t InputOff; + ssize_t OutputOff : 8 * sizeof(ssize_t) - 1; + size_t Live : 1; +}; +static_assert(sizeof(SectionPiece) == 2 * sizeof(size_t), + "SectionPiece is too big"); + +// This corresponds to a SHF_MERGE section of an input file. +template <class ELFT> class MergeInputSection : public InputSectionBase<ELFT> { + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Shdr Elf_Shdr; + +public: + MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, + StringRef Name); + static bool classof(const InputSectionData *S); + void splitIntoPieces(); + + // Mark the piece at a given offset live. Used by GC. + void markLiveAt(uintX_t Offset) { + assert(this->Flags & llvm::ELF::SHF_ALLOC); + LiveOffsets.insert(Offset); + } + + // Translate an offset in the input section to an offset + // in the output section. + uintX_t getOffset(uintX_t Offset) const; + + // Splittable sections are handled as a sequence of data + // rather than a single large blob of data. + std::vector<SectionPiece> Pieces; + + // Returns I'th piece's data. This function is very hot when + // string merging is enabled, so we want to inline. + LLVM_ATTRIBUTE_ALWAYS_INLINE + llvm::CachedHashStringRef getData(size_t I) const { + size_t Begin = Pieces[I].InputOff; + size_t End; + if (Pieces.size() - 1 == I) + End = this->Data.size(); + else + End = Pieces[I + 1].InputOff; + + StringRef S = {(const char *)(this->Data.data() + Begin), End - Begin}; + return {S, Hashes[I]}; + } + + // Returns the SectionPiece at a given input section offset. + SectionPiece *getSectionPiece(uintX_t Offset); + const SectionPiece *getSectionPiece(uintX_t Offset) const; + +private: + void splitStrings(ArrayRef<uint8_t> A, size_t Size); + void splitNonStrings(ArrayRef<uint8_t> A, size_t Size); + + std::vector<uint32_t> Hashes; + + mutable llvm::DenseMap<uintX_t, uintX_t> OffsetMap; + mutable std::once_flag InitOffsetMap; + + llvm::DenseSet<uintX_t> LiveOffsets; +}; + +struct EhSectionPiece : public SectionPiece { + EhSectionPiece(size_t Off, InputSectionData *ID, uint32_t Size, + unsigned FirstRelocation) + : SectionPiece(Off, false), ID(ID), Size(Size), + FirstRelocation(FirstRelocation) {} + InputSectionData *ID; + uint32_t Size; + uint32_t size() const { return Size; } + + ArrayRef<uint8_t> data() { return {ID->Data.data() + this->InputOff, Size}; } + unsigned FirstRelocation; +}; + +// This corresponds to a .eh_frame section of an input file. +template <class ELFT> class EhInputSection : public InputSectionBase<ELFT> { +public: + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::uint uintX_t; + EhInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, StringRef Name); + static bool classof(const InputSectionData *S); + void split(); + template <class RelTy> void split(ArrayRef<RelTy> Rels); + + // Splittable sections are handled as a sequence of data + // rather than a single large blob of data. + std::vector<EhSectionPiece> Pieces; +}; + +// This corresponds to a non SHF_MERGE section of an input file. +template <class ELFT> class InputSection : public InputSectionBase<ELFT> { + typedef InputSectionBase<ELFT> Base; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + typedef InputSectionData::Kind Kind; + +public: + InputSection(); + InputSection(uintX_t Flags, uint32_t Type, uintX_t Addralign, + ArrayRef<uint8_t> Data, StringRef Name, + Kind K = InputSectionData::Regular); + InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, StringRef Name); + + static InputSection<ELFT> Discarded; + + // Write this section to a mmap'ed file, assuming Buf is pointing to + // beginning of the output section. + void writeTo(uint8_t *Buf); + + // The offset from beginning of the output sections this section was assigned + // to. The writer sets a value. + uint64_t OutSecOff = 0; + + // InputSection that is dependent on us (reverse dependency for GC) + InputSectionBase<ELFT> *DependentSection = nullptr; + + static bool classof(const InputSectionData *S); + + InputSectionBase<ELFT> *getRelocatedSection(); + + // Register thunk related to the symbol. When the section is written + // to a mmap'ed file, target is requested to write an actual thunk code. + // Now thunks is supported for MIPS and ARM target only. + void addThunk(const Thunk<ELFT> *T); + + // The offset of synthetic thunk code from beginning of this section. + uint64_t getThunkOff() const; + + // Size of chunk with thunks code. + uint64_t getThunksSize() const; + + template <class RelTy> + void relocateNonAlloc(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); + + // Used by ICF. + uint32_t Class[2] = {0, 0}; + + // Called by ICF to merge two input sections. + void replace(InputSection<ELFT> *Other); + +private: + template <class RelTy> + void copyRelocations(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); + + llvm::TinyPtrVector<const Thunk<ELFT> *> Thunks; +}; + +template <class ELFT> InputSection<ELFT> InputSection<ELFT>::Discarded; +} // namespace elf + +template <class ELFT> std::string toString(const elf::InputSectionBase<ELFT> *); +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/LTO.cpp b/contrib/llvm/tools/lld/ELF/LTO.cpp new file mode 100644 index 000000000000..b342b6195f1d --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/LTO.cpp @@ -0,0 +1,164 @@ +//===- LTO.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "LTO.h" +#include "Config.h" +#include "Error.h" +#include "InputFiles.h" +#include "Symbols.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/CommandFlags.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/LTO/Config.h" +#include "llvm/LTO/LTO.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstddef> +#include <memory> +#include <string> +#include <system_error> +#include <vector> + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +// This is for use when debugging LTO. +static void saveBuffer(StringRef Buffer, const Twine &Path) { + std::error_code EC; + raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None); + if (EC) + error(EC, "cannot create " + Path); + OS << Buffer; +} + +static void diagnosticHandler(const DiagnosticInfo &DI) { + SmallString<128> ErrStorage; + raw_svector_ostream OS(ErrStorage); + DiagnosticPrinterRawOStream DP(OS); + DI.print(DP); + warn(ErrStorage); +} + +static void checkError(Error E) { + handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) -> Error { + error(EIB.message()); + return Error::success(); + }); +} + +static std::unique_ptr<lto::LTO> createLTO() { + lto::Config Conf; + + // LLD supports the new relocations. + Conf.Options = InitTargetOptionsFromCodeGenFlags(); + Conf.Options.RelaxELFRelocations = true; + + Conf.RelocModel = Config->Pic ? Reloc::PIC_ : Reloc::Static; + Conf.DisableVerify = Config->DisableVerify; + Conf.DiagHandler = diagnosticHandler; + Conf.OptLevel = Config->LTOO; + + // Set up a custom pipeline if we've been asked to. + Conf.OptPipeline = Config->LTONewPmPasses; + Conf.AAPipeline = Config->LTOAAPipeline; + + if (Config->SaveTemps) + checkError(Conf.addSaveTemps(std::string(Config->OutputFile) + ".", + /*UseInputModulePath*/ true)); + + lto::ThinBackend Backend; + if (Config->ThinLTOJobs != -1u) + Backend = lto::createInProcessThinBackend(Config->ThinLTOJobs); + return llvm::make_unique<lto::LTO>(std::move(Conf), Backend, + Config->LTOPartitions); +} + +BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {} + +BitcodeCompiler::~BitcodeCompiler() = default; + +template <class ELFT> static void undefine(Symbol *S) { + replaceBody<Undefined<ELFT>>(S, S->body()->getName(), /*IsLocal=*/false, + STV_DEFAULT, S->body()->Type, nullptr); +} + +template <class ELFT> void BitcodeCompiler::add(BitcodeFile &F) { + lto::InputFile &Obj = *F.Obj; + unsigned SymNum = 0; + std::vector<Symbol *> Syms = F.getSymbols(); + std::vector<lto::SymbolResolution> Resols(Syms.size()); + + // Provide a resolution to the LTO API for each symbol. + for (const lto::InputFile::Symbol &ObjSym : Obj.symbols()) { + Symbol *Sym = Syms[SymNum]; + lto::SymbolResolution &R = Resols[SymNum]; + ++SymNum; + SymbolBody *B = Sym->body(); + + // Ideally we shouldn't check for SF_Undefined but currently IRObjectFile + // reports two symbols for module ASM defined. Without this check, lld + // flags an undefined in IR with a definition in ASM as prevailing. + // Once IRObjectFile is fixed to report only one symbol this hack can + // be removed. + R.Prevailing = + !(ObjSym.getFlags() & object::BasicSymbolRef::SF_Undefined) && + B->File == &F; + + R.VisibleToRegularObj = + Sym->IsUsedInRegularObj || (R.Prevailing && Sym->includeInDynsym()); + if (R.Prevailing) + undefine<ELFT>(Sym); + } + checkError(LTOObj->add(std::move(F.Obj), Resols)); +} + +// Merge all the bitcode files we have seen, codegen the result +// and return the resulting ObjectFile(s). +std::vector<InputFile *> BitcodeCompiler::compile() { + std::vector<InputFile *> Ret; + unsigned MaxTasks = LTOObj->getMaxTasks(); + Buff.resize(MaxTasks); + + checkError(LTOObj->run([&](size_t Task) { + return llvm::make_unique<lto::NativeObjectStream>( + llvm::make_unique<raw_svector_ostream>(Buff[Task])); + })); + + for (unsigned I = 0; I != MaxTasks; ++I) { + if (Buff[I].empty()) + continue; + if (Config->SaveTemps) { + if (MaxTasks == 1) + saveBuffer(Buff[I], Config->OutputFile + ".lto.o"); + else + saveBuffer(Buff[I], Config->OutputFile + Twine(I) + ".lto.o"); + } + InputFile *Obj = createObjectFile(MemoryBufferRef(Buff[I], "lto.tmp")); + Ret.push_back(Obj); + } + return Ret; +} + +template void BitcodeCompiler::template add<ELF32LE>(BitcodeFile &); +template void BitcodeCompiler::template add<ELF32BE>(BitcodeFile &); +template void BitcodeCompiler::template add<ELF64LE>(BitcodeFile &); +template void BitcodeCompiler::template add<ELF64BE>(BitcodeFile &); diff --git a/contrib/llvm/tools/lld/ELF/LTO.h b/contrib/llvm/tools/lld/ELF/LTO.h new file mode 100644 index 000000000000..3cb763650e1c --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/LTO.h @@ -0,0 +1,56 @@ +//===- LTO.h ----------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides a way to combine bitcode files into one ELF +// file by compiling them using LLVM. +// +// If LTO is in use, your input files are not in regular ELF files +// but instead LLVM bitcode files. In that case, the linker has to +// convert bitcode files into the native format so that we can create +// an ELF file that contains native code. This file provides that +// functionality. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_LTO_H +#define LLD_ELF_LTO_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/SmallString.h" +#include <memory> +#include <vector> + +namespace llvm { +namespace lto { +class LTO; +} +} + +namespace lld { +namespace elf { + +class BitcodeFile; +class InputFile; + +class BitcodeCompiler { +public: + BitcodeCompiler(); + ~BitcodeCompiler(); + + template <class ELFT> void add(BitcodeFile &F); + std::vector<InputFile *> compile(); + +private: + std::unique_ptr<llvm::lto::LTO> LTOObj; + std::vector<SmallString<0>> Buff; +}; +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp new file mode 100644 index 000000000000..59ef36c87de5 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp @@ -0,0 +1,1966 @@ +//===- LinkerScript.cpp ---------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the parser/evaluator of the linker script. +// +//===----------------------------------------------------------------------===// + +#include "LinkerScript.h" +#include "Config.h" +#include "Driver.h" +#include "InputSection.h" +#include "Memory.h" +#include "OutputSections.h" +#include "ScriptParser.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <limits> +#include <memory> +#include <string> +#include <tuple> +#include <vector> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace lld; +using namespace lld::elf; + +LinkerScriptBase *elf::ScriptBase; +ScriptConfiguration *elf::ScriptConfig; + +template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { + uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; + Symbol *Sym = Symtab<ELFT>::X->addUndefined( + Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, + /*Type*/ 0, + /*CanOmitFromDynSym*/ false, /*File*/ nullptr); + + replaceBody<DefinedRegular<ELFT>>(Sym, Cmd->Name, /*IsLocal=*/false, + Visibility, STT_NOTYPE, 0, 0, nullptr, + nullptr); + return Sym->body(); +} + +template <class ELFT> static SymbolBody *addSynthetic(SymbolAssignment *Cmd) { + uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; + const OutputSectionBase *Sec = + ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); + Symbol *Sym = Symtab<ELFT>::X->addUndefined( + Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, + /*Type*/ 0, + /*CanOmitFromDynSym*/ false, /*File*/ nullptr); + + replaceBody<DefinedSynthetic>(Sym, Cmd->Name, 0, Sec); + return Sym->body(); +} + +static bool isUnderSysroot(StringRef Path) { + if (Config->Sysroot == "") + return false; + for (; !Path.empty(); Path = sys::path::parent_path(Path)) + if (sys::fs::equivalent(Config->Sysroot, Path)) + return true; + return false; +} + +template <class ELFT> static void assignSymbol(SymbolAssignment *Cmd) { + // If there are sections, then let the value be assigned later in + // `assignAddresses`. + if (ScriptConfig->HasSections) + return; + + uint64_t Value = Cmd->Expression(0); + if (Cmd->Expression.IsAbsolute()) { + cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Value; + } else { + const OutputSectionBase *Sec = Cmd->Expression.Section(); + if (Sec) + cast<DefinedSynthetic>(Cmd->Sym)->Value = Value - Sec->Addr; + } +} + +template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { + if (Cmd->Name == ".") + return; + + // If a symbol was in PROVIDE(), we need to define it only when + // it is a referenced undefined symbol. + SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); + if (Cmd->Provide && (!B || B->isDefined())) + return; + + // Otherwise, create a new symbol if one does not exist or an + // undefined one does exist. + if (Cmd->Expression.IsAbsolute()) + Cmd->Sym = addRegular<ELFT>(Cmd); + else + Cmd->Sym = addSynthetic<ELFT>(Cmd); + assignSymbol<ELFT>(Cmd); +} + +bool SymbolAssignment::classof(const BaseCommand *C) { + return C->Kind == AssignmentKind; +} + +bool OutputSectionCommand::classof(const BaseCommand *C) { + return C->Kind == OutputSectionKind; +} + +bool InputSectionDescription::classof(const BaseCommand *C) { + return C->Kind == InputSectionKind; +} + +bool AssertCommand::classof(const BaseCommand *C) { + return C->Kind == AssertKind; +} + +bool BytesDataCommand::classof(const BaseCommand *C) { + return C->Kind == BytesDataKind; +} + +template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; +template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; + +template <class ELFT> static StringRef basename(InputSectionBase<ELFT> *S) { + if (S->getFile()) + return sys::path::filename(S->getFile()->getName()); + return ""; +} + +template <class ELFT> +bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { + for (InputSectionDescription *ID : Opt.KeptSections) + if (ID->FilePat.match(basename(S))) + for (SectionPattern &P : ID->SectionPatterns) + if (P.SectionPat.match(S->Name)) + return true; + return false; +} + +static bool comparePriority(InputSectionData *A, InputSectionData *B) { + return getPriority(A->Name) < getPriority(B->Name); +} + +static bool compareName(InputSectionData *A, InputSectionData *B) { + return A->Name < B->Name; +} + +static bool compareAlignment(InputSectionData *A, InputSectionData *B) { + // ">" is not a mistake. Larger alignments are placed before smaller + // alignments in order to reduce the amount of padding necessary. + // This is compatible with GNU. + return A->Alignment > B->Alignment; +} + +static std::function<bool(InputSectionData *, InputSectionData *)> +getComparator(SortSectionPolicy K) { + switch (K) { + case SortSectionPolicy::Alignment: + return compareAlignment; + case SortSectionPolicy::Name: + return compareName; + case SortSectionPolicy::Priority: + return comparePriority; + default: + llvm_unreachable("unknown sort policy"); + } +} + +template <class ELFT> +static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, + ConstraintKind Kind) { + if (Kind == ConstraintKind::NoConstraint) + return true; + bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { + auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); + return Sec->Flags & SHF_WRITE; + }); + return (IsRW && Kind == ConstraintKind::ReadWrite) || + (!IsRW && Kind == ConstraintKind::ReadOnly); +} + +static void sortSections(InputSectionData **Begin, InputSectionData **End, + SortSectionPolicy K) { + if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) + std::stable_sort(Begin, End, getComparator(K)); +} + +// Compute and remember which sections the InputSectionDescription matches. +template <class ELFT> +void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { + // Collects all sections that satisfy constraints of I + // and attach them to I. + for (SectionPattern &Pat : I->SectionPatterns) { + size_t SizeBefore = I->Sections.size(); + + for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) { + if (!S->Live || S->Assigned) + continue; + + StringRef Filename = basename(S); + if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) + continue; + if (!Pat.SectionPat.match(S->Name)) + continue; + I->Sections.push_back(S); + S->Assigned = true; + } + + // Sort sections as instructed by SORT-family commands and --sort-section + // option. Because SORT-family commands can be nested at most two depth + // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command + // line option is respected even if a SORT command is given, the exact + // behavior we have here is a bit complicated. Here are the rules. + // + // 1. If two SORT commands are given, --sort-section is ignored. + // 2. If one SORT command is given, and if it is not SORT_NONE, + // --sort-section is handled as an inner SORT command. + // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. + // 4. If no SORT command is given, sort according to --sort-section. + InputSectionData **Begin = I->Sections.data() + SizeBefore; + InputSectionData **End = I->Sections.data() + I->Sections.size(); + if (Pat.SortOuter != SortSectionPolicy::None) { + if (Pat.SortInner == SortSectionPolicy::Default) + sortSections(Begin, End, Config->SortSection); + else + sortSections(Begin, End, Pat.SortInner); + sortSections(Begin, End, Pat.SortOuter); + } + } +} + +template <class ELFT> +void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { + for (InputSectionBase<ELFT> *S : V) { + S->Live = false; + reportDiscarded(S); + } +} + +template <class ELFT> +std::vector<InputSectionBase<ELFT> *> +LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { + std::vector<InputSectionBase<ELFT> *> Ret; + + for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { + auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); + if (!Cmd) + continue; + computeInputSections(Cmd); + for (InputSectionData *S : Cmd->Sections) + Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); + } + + return Ret; +} + +template <class ELFT> +void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, + InputSectionBase<ELFT> *Sec, + StringRef Name) { + OutputSectionBase *OutSec; + bool IsNew; + std::tie(OutSec, IsNew) = Factory.create(Sec, Name); + if (IsNew) + OutputSections->push_back(OutSec); + OutSec->addSection(Sec); +} + +template <class ELFT> +void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { + for (unsigned I = 0; I < Opt.Commands.size(); ++I) { + auto Iter = Opt.Commands.begin() + I; + const std::unique_ptr<BaseCommand> &Base1 = *Iter; + + // Handle symbol assignments outside of any output section. + if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { + addSymbol<ELFT>(Cmd); + continue; + } + + if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { + // If we don't have SECTIONS then output sections have already been + // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses + // will not be called, so ASSERT should be evaluated now. + if (!Opt.HasSections) + Cmd->Expression(0); + continue; + } + + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { + std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); + + // The output section name `/DISCARD/' is special. + // Any input section assigned to it is discarded. + if (Cmd->Name == "/DISCARD/") { + discard(V); + continue; + } + + // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive + // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input + // sections satisfy a given constraint. If not, a directive is handled + // as if it wasn't present from the beginning. + // + // Because we'll iterate over Commands many more times, the easiest + // way to "make it as if it wasn't present" is to just remove it. + if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { + for (InputSectionBase<ELFT> *S : V) + S->Assigned = false; + Opt.Commands.erase(Iter); + --I; + continue; + } + + // A directive may contain symbol definitions like this: + // ".foo : { ...; bar = .; }". Handle them. + for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) + if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) + addSymbol<ELFT>(OutCmd); + + // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign + // is given, input sections are aligned to that value, whether the + // given value is larger or smaller than the original section alignment. + if (Cmd->SubalignExpr) { + uint32_t Subalign = Cmd->SubalignExpr(0); + for (InputSectionBase<ELFT> *S : V) + S->Alignment = Subalign; + } + + // Add input sections to an output section. + for (InputSectionBase<ELFT> *S : V) + addSection(Factory, S, Cmd->Name); + } + } +} + +// Add sections that didn't match any sections command. +template <class ELFT> +void LinkerScript<ELFT>::addOrphanSections( + OutputSectionFactory<ELFT> &Factory) { + for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) + if (S->Live && !S->OutSec) + addSection(Factory, S, getOutputSectionName(S->Name)); +} + +// Sets value of a section-defined symbol. Two kinds of +// symbols are processed: synthetic symbols, whose value +// is an offset from beginning of section and regular +// symbols whose value is absolute. +template <class ELFT> +static void assignSectionSymbol(SymbolAssignment *Cmd, + typename ELFT::uint Value) { + if (!Cmd->Sym) + return; + + if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { + Body->Section = Cmd->Expression.Section(); + Body->Value = Cmd->Expression(Value) - Body->Section->Addr; + return; + } + auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); + Body->Value = Cmd->Expression(Value); +} + +template <class ELFT> static bool isTbss(OutputSectionBase *Sec) { + return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; +} + +template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { + if (!AlreadyOutputIS.insert(S).second) + return; + bool IsTbss = isTbss<ELFT>(CurOutSec); + + uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; + Pos = alignTo(Pos, S->Alignment); + S->OutSecOff = Pos - CurOutSec->Addr; + Pos += S->getSize(); + + // Update output section size after adding each section. This is so that + // SIZEOF works correctly in the case below: + // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } + CurOutSec->Size = Pos - CurOutSec->Addr; + + if (IsTbss) + ThreadBssOffset = Pos - Dot; + else + Dot = Pos; +} + +template <class ELFT> void LinkerScript<ELFT>::flush() { + if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) + return; + if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { + for (InputSection<ELFT> *I : OutSec->Sections) + output(I); + } else { + Dot += CurOutSec->Size; + } +} + +template <class ELFT> +void LinkerScript<ELFT>::switchTo(OutputSectionBase *Sec) { + if (CurOutSec == Sec) + return; + if (AlreadyOutputOS.count(Sec)) + return; + + flush(); + CurOutSec = Sec; + + Dot = alignTo(Dot, CurOutSec->Addralign); + CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; + + // If neither AT nor AT> is specified for an allocatable section, the linker + // will set the LMA such that the difference between VMA and LMA for the + // section is the same as the preceding output section in the same region + // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html + CurOutSec->setLMAOffset(LMAOffset); +} + +template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { + // This handles the assignments to symbol or to a location counter (.) + if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { + if (AssignCmd->Name == ".") { + // Update to location counter means update to section size. + uintX_t Val = AssignCmd->Expression(Dot); + if (Val < Dot) + error("unable to move location counter backward for: " + + CurOutSec->Name); + Dot = Val; + CurOutSec->Size = Dot - CurOutSec->Addr; + return; + } + assignSectionSymbol<ELFT>(AssignCmd, Dot); + return; + } + + // Handle BYTE(), SHORT(), LONG(), or QUAD(). + if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { + DataCmd->Offset = Dot - CurOutSec->Addr; + Dot += DataCmd->Size; + CurOutSec->Size = Dot - CurOutSec->Addr; + return; + } + + if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { + AssertCmd->Expression(Dot); + return; + } + + // It handles single input section description command, + // calculates and assigns the offsets for each section and also + // updates the output section size. + auto &ICmd = cast<InputSectionDescription>(Base); + for (InputSectionData *ID : ICmd.Sections) { + // We tentatively added all synthetic sections at the beginning and removed + // empty ones afterwards (because there is no way to know whether they were + // going be empty or not other than actually running linker scripts.) + // We need to ignore remains of empty sections. + if (auto *Sec = dyn_cast<SyntheticSection<ELFT>>(ID)) + if (Sec->empty()) + continue; + + auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); + switchTo(IB->OutSec); + if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) + output(I); + else + flush(); + } +} + +template <class ELFT> +static std::vector<OutputSectionBase *> +findSections(StringRef Name, const std::vector<OutputSectionBase *> &Sections) { + std::vector<OutputSectionBase *> Ret; + for (OutputSectionBase *Sec : Sections) + if (Sec->getName() == Name) + Ret.push_back(Sec); + return Ret; +} + +// This function assigns offsets to input sections and an output section +// for a single sections command (e.g. ".text { *(.text); }"). +template <class ELFT> +void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { + if (Cmd->LMAExpr) + LMAOffset = Cmd->LMAExpr(Dot) - Dot; + std::vector<OutputSectionBase *> Sections = + findSections<ELFT>(Cmd->Name, *OutputSections); + if (Sections.empty()) + return; + switchTo(Sections[0]); + + // Find the last section output location. We will output orphan sections + // there so that end symbols point to the correct location. + auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), + [](const std::unique_ptr<BaseCommand> &Cmd) { + return !isa<SymbolAssignment>(*Cmd); + }) + .base(); + for (auto I = Cmd->Commands.begin(); I != E; ++I) + process(**I); + for (OutputSectionBase *Base : Sections) + switchTo(Base); + flush(); + std::for_each(E, Cmd->Commands.end(), + [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); +} + +template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { + // It is common practice to use very generic linker scripts. So for any + // given run some of the output sections in the script will be empty. + // We could create corresponding empty output sections, but that would + // clutter the output. + // We instead remove trivially empty sections. The bfd linker seems even + // more aggressive at removing them. + auto Pos = std::remove_if( + Opt.Commands.begin(), Opt.Commands.end(), + [&](const std::unique_ptr<BaseCommand> &Base) { + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) + return findSections<ELFT>(Cmd->Name, *OutputSections).empty(); + return false; + }); + Opt.Commands.erase(Pos, Opt.Commands.end()); +} + +static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { + for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) + if (!isa<InputSectionDescription>(*I)) + return false; + return true; +} + +template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { + // If the output section contains only symbol assignments, create a + // corresponding output section. The bfd linker seems to only create them if + // '.' is assigned to, but creating these section should not have any bad + // consequeces and gives us a section to put the symbol in. + uintX_t Flags = SHF_ALLOC; + uint32_t Type = SHT_NOBITS; + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + if (!Cmd) + continue; + std::vector<OutputSectionBase *> Secs = + findSections<ELFT>(Cmd->Name, *OutputSections); + if (!Secs.empty()) { + Flags = Secs[0]->Flags; + Type = Secs[0]->Type; + continue; + } + + if (isAllSectionDescription(*Cmd)) + continue; + + auto *OutSec = make<OutputSection<ELFT>>(Cmd->Name, Type, Flags); + OutputSections->push_back(OutSec); + } +} + +template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { + placeOrphanSections(); + + // If output section command doesn't specify any segments, + // and we haven't previously assigned any section to segment, + // then we simply assign section to the very first load segment. + // Below is an example of such linker script: + // PHDRS { seg PT_LOAD; } + // SECTIONS { .aaa : { *(.aaa) } } + std::vector<StringRef> DefPhdrs; + auto FirstPtLoad = + std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), + [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); + if (FirstPtLoad != Opt.PhdrsCommands.end()) + DefPhdrs.push_back(FirstPtLoad->Name); + + // Walk the commands and propagate the program headers to commands that don't + // explicitly specify them. + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + if (!Cmd) + continue; + if (Cmd->Phdrs.empty()) + Cmd->Phdrs = DefPhdrs; + else + DefPhdrs = Cmd->Phdrs; + } + + removeEmptyCommands(); +} + +// When placing orphan sections, we want to place them after symbol assignments +// so that an orphan after +// begin_foo = .; +// foo : { *(foo) } +// end_foo = .; +// doesn't break the intended meaning of the begin/end symbols. +// We don't want to go over sections since Writer<ELFT>::sortSections is the +// one in charge of deciding the order of the sections. +// We don't want to go over alignments, since doing so in +// rx_sec : { *(rx_sec) } +// . = ALIGN(0x1000); +// /* The RW PT_LOAD starts here*/ +// rw_sec : { *(rw_sec) } +// would mean that the RW PT_LOAD would become unaligned. +static bool shouldSkip(const BaseCommand &Cmd) { + if (isa<OutputSectionCommand>(Cmd)) + return false; + const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); + if (!Assign) + return true; + return Assign->Name != "."; +} + +// Orphan sections are sections present in the input files which are not +// explicitly placed into the output file by the linker script. This just +// places them in the order already decided in OutputSections. +template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { + // The OutputSections are already in the correct order. + // This loops creates or moves commands as needed so that they are in the + // correct order. + int CmdIndex = 0; + + // As a horrible special case, skip the first . assignment if it is before any + // section. We do this because it is common to set a load address by starting + // the script with ". = 0xabcd" and the expectation is that every section is + // after that. + auto FirstSectionOrDotAssignment = + std::find_if(Opt.Commands.begin(), Opt.Commands.end(), + [](const std::unique_ptr<BaseCommand> &Cmd) { + if (isa<OutputSectionCommand>(*Cmd)) + return true; + const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); + if (!Assign) + return false; + return Assign->Name == "."; + }); + if (FirstSectionOrDotAssignment != Opt.Commands.end()) { + CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); + if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) + ++CmdIndex; + } + + for (OutputSectionBase *Sec : *OutputSections) { + StringRef Name = Sec->getName(); + + // Find the last spot where we can insert a command and still get the + // correct result. + auto CmdIter = Opt.Commands.begin() + CmdIndex; + auto E = Opt.Commands.end(); + while (CmdIter != E && shouldSkip(**CmdIter)) { + ++CmdIter; + ++CmdIndex; + } + + auto Pos = + std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + return Cmd && Cmd->Name == Name; + }); + if (Pos == E) { + Opt.Commands.insert(CmdIter, + llvm::make_unique<OutputSectionCommand>(Name)); + ++CmdIndex; + continue; + } + + // Continue from where we found it. + CmdIndex = (Pos - Opt.Commands.begin()) + 1; + } +} + +template <class ELFT> +void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { + // Assign addresses as instructed by linker script SECTIONS sub-commands. + Dot = 0; + + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { + if (Cmd->Name == ".") { + Dot = Cmd->Expression(Dot); + } else if (Cmd->Sym) { + assignSectionSymbol<ELFT>(Cmd, Dot); + } + continue; + } + + if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { + Cmd->Expression(Dot); + continue; + } + + auto *Cmd = cast<OutputSectionCommand>(Base.get()); + if (Cmd->AddrExpr) + Dot = Cmd->AddrExpr(Dot); + assignOffsets(Cmd); + } + + uintX_t MinVA = std::numeric_limits<uintX_t>::max(); + for (OutputSectionBase *Sec : *OutputSections) { + if (Sec->Flags & SHF_ALLOC) + MinVA = std::min<uint64_t>(MinVA, Sec->Addr); + else + Sec->Addr = 0; + } + + uintX_t HeaderSize = getHeaderSize(); + // If the linker script doesn't have PHDRS, add ElfHeader and ProgramHeaders + // now that we know we have space. + if (HeaderSize <= MinVA && !hasPhdrsCommands()) + allocateHeaders<ELFT>(Phdrs, *OutputSections); + + // ELF and Program headers need to be right before the first section in + // memory. Set their addresses accordingly. + MinVA = alignDown(MinVA - HeaderSize, Config->MaxPageSize); + Out<ELFT>::ElfHeader->Addr = MinVA; + Out<ELFT>::ProgramHeaders->Addr = Out<ELFT>::ElfHeader->Size + MinVA; +} + +// Creates program headers as instructed by PHDRS linker script command. +template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { + std::vector<PhdrEntry> Ret; + + // Process PHDRS and FILEHDR keywords because they are not + // real output sections and cannot be added in the following loop. + for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { + Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); + PhdrEntry &Phdr = Ret.back(); + + if (Cmd.HasFilehdr) + Phdr.add(Out<ELFT>::ElfHeader); + if (Cmd.HasPhdrs) + Phdr.add(Out<ELFT>::ProgramHeaders); + + if (Cmd.LMAExpr) { + Phdr.p_paddr = Cmd.LMAExpr(0); + Phdr.HasLMA = true; + } + } + + // Add output sections to program headers. + for (OutputSectionBase *Sec : *OutputSections) { + if (!(Sec->Flags & SHF_ALLOC)) + break; + + // Assign headers specified by linker script + for (size_t Id : getPhdrIndices(Sec->getName())) { + Ret[Id].add(Sec); + if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) + Ret[Id].p_flags |= Sec->getPhdrFlags(); + } + } + return Ret; +} + +template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { + // Ignore .interp section in case we have PHDRS specification + // and PT_INTERP isn't listed. + return !Opt.PhdrsCommands.empty() && + llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { + return Cmd.Type == PT_INTERP; + }) == Opt.PhdrsCommands.end(); +} + +template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) + if (Cmd->Name == Name) + return Cmd->Filler; + return 0; +} + +template <class ELFT> +static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { + const endianness E = ELFT::TargetEndianness; + + switch (Size) { + case 1: + *Buf = (uint8_t)Data; + break; + case 2: + write16<E>(Buf, Data); + break; + case 4: + write32<E>(Buf, Data); + break; + case 8: + write64<E>(Buf, Data); + break; + default: + llvm_unreachable("unsupported Size argument"); + } +} + +template <class ELFT> +void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { + int I = getSectionIndex(Name); + if (I == INT_MAX) + return; + + auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); + for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) + if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) + writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); +} + +template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) + if (Cmd->LMAExpr && Cmd->Name == Name) + return true; + return false; +} + +// Returns the index of the given section name in linker script +// SECTIONS commands. Sections are laid out as the same order as they +// were in the script. If a given name did not appear in the script, +// it returns INT_MAX, so that it will be laid out at end of file. +template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { + for (int I = 0, E = Opt.Commands.size(); I != E; ++I) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) + if (Cmd->Name == Name) + return I; + return INT_MAX; +} + +template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { + return !Opt.PhdrsCommands.empty(); +} + +template <class ELFT> +const OutputSectionBase *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, + StringRef Name) { + static OutputSectionBase FakeSec("", 0, 0); + + for (OutputSectionBase *Sec : *OutputSections) + if (Sec->getName() == Name) + return Sec; + + error(Loc + ": undefined section " + Name); + return &FakeSec; +} + +// This function is essentially the same as getOutputSection(Name)->Size, +// but it won't print out an error message if a given section is not found. +// +// Linker script does not create an output section if its content is empty. +// We want to allow SIZEOF(.foo) where .foo is a section which happened to +// be empty. That is why this function is different from getOutputSection(). +template <class ELFT> +uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { + for (OutputSectionBase *Sec : *OutputSections) + if (Sec->getName() == Name) + return Sec->Size; + return 0; +} + +template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { + return elf::getHeaderSize<ELFT>(); +} + +template <class ELFT> +uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { + if (SymbolBody *B = Symtab<ELFT>::X->find(S)) + return B->getVA<ELFT>(); + error(Loc + ": symbol not found: " + S); + return 0; +} + +template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { + return Symtab<ELFT>::X->find(S) != nullptr; +} + +template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { + SymbolBody *Sym = Symtab<ELFT>::X->find(S); + auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym); + return DR && !DR->Section; +} + +// Gets section symbol belongs to. Symbol "." doesn't belong to any +// specific section but isn't absolute at the same time, so we try +// to find suitable section for it as well. +template <class ELFT> +const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) { + SymbolBody *Sym = Symtab<ELFT>::X->find(S); + if (!Sym) { + if (OutputSections->empty()) + return nullptr; + return CurOutSec ? CurOutSec : (*OutputSections)[0]; + } + + if (auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym)) + return DR->Section ? DR->Section->OutSec : nullptr; + if (auto *DS = dyn_cast_or_null<DefinedSynthetic>(Sym)) + return DS->Section; + + return nullptr; +} + +// Returns indices of ELF headers containing specific section, identified +// by Name. Each index is a zero based number of ELF header listed within +// PHDRS {} script block. +template <class ELFT> +std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + if (!Cmd || Cmd->Name != SectionName) + continue; + + std::vector<size_t> Ret; + for (StringRef PhdrName : Cmd->Phdrs) + Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); + return Ret; + } + return {}; +} + +template <class ELFT> +size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { + size_t I = 0; + for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { + if (Cmd.Name == PhdrName) + return I; + ++I; + } + error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); + return 0; +} + +class elf::ScriptParser final : public ScriptParserBase { + typedef void (ScriptParser::*Handler)(); + +public: + ScriptParser(MemoryBufferRef MB) + : ScriptParserBase(MB), + IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} + + void readLinkerScript(); + void readVersionScript(); + void readDynamicList(); + +private: + void addFile(StringRef Path); + + void readAsNeeded(); + void readEntry(); + void readExtern(); + void readGroup(); + void readInclude(); + void readOutput(); + void readOutputArch(); + void readOutputFormat(); + void readPhdrs(); + void readSearchDir(); + void readSections(); + void readVersion(); + void readVersionScriptCommand(); + + SymbolAssignment *readAssignment(StringRef Name); + BytesDataCommand *readBytesDataCommand(StringRef Tok); + uint32_t readFill(); + OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); + uint32_t readOutputSectionFiller(StringRef Tok); + std::vector<StringRef> readOutputSectionPhdrs(); + InputSectionDescription *readInputSectionDescription(StringRef Tok); + StringMatcher readFilePatterns(); + std::vector<SectionPattern> readInputSectionsList(); + InputSectionDescription *readInputSectionRules(StringRef FilePattern); + unsigned readPhdrType(); + SortSectionPolicy readSortKind(); + SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); + SymbolAssignment *readProvideOrAssignment(StringRef Tok); + void readSort(); + Expr readAssert(); + + Expr readExpr(); + Expr readExpr1(Expr Lhs, int MinPrec); + StringRef readParenLiteral(); + Expr readPrimary(); + Expr readTernary(Expr Cond); + Expr readParenExpr(); + + // For parsing version script. + std::vector<SymbolVersion> readVersionExtern(); + void readAnonymousDeclaration(); + void readVersionDeclaration(StringRef VerStr); + std::vector<SymbolVersion> readSymbols(); + + ScriptConfiguration &Opt = *ScriptConfig; + bool IsUnderSysroot; +}; + +void ScriptParser::readDynamicList() { + expect("{"); + readAnonymousDeclaration(); + if (!atEOF()) + setError("EOF expected, but got " + next()); +} + +void ScriptParser::readVersionScript() { + readVersionScriptCommand(); + if (!atEOF()) + setError("EOF expected, but got " + next()); +} + +void ScriptParser::readVersionScriptCommand() { + if (consume("{")) { + readAnonymousDeclaration(); + return; + } + + while (!atEOF() && !Error && peek() != "}") { + StringRef VerStr = next(); + if (VerStr == "{") { + setError("anonymous version definition is used in " + "combination with other version definitions"); + return; + } + expect("{"); + readVersionDeclaration(VerStr); + } +} + +void ScriptParser::readVersion() { + expect("{"); + readVersionScriptCommand(); + expect("}"); +} + +void ScriptParser::readLinkerScript() { + while (!atEOF()) { + StringRef Tok = next(); + if (Tok == ";") + continue; + + if (Tok == "ASSERT") { + Opt.Commands.emplace_back(new AssertCommand(readAssert())); + } else if (Tok == "ENTRY") { + readEntry(); + } else if (Tok == "EXTERN") { + readExtern(); + } else if (Tok == "GROUP" || Tok == "INPUT") { + readGroup(); + } else if (Tok == "INCLUDE") { + readInclude(); + } else if (Tok == "OUTPUT") { + readOutput(); + } else if (Tok == "OUTPUT_ARCH") { + readOutputArch(); + } else if (Tok == "OUTPUT_FORMAT") { + readOutputFormat(); + } else if (Tok == "PHDRS") { + readPhdrs(); + } else if (Tok == "SEARCH_DIR") { + readSearchDir(); + } else if (Tok == "SECTIONS") { + readSections(); + } else if (Tok == "VERSION") { + readVersion(); + } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { + Opt.Commands.emplace_back(Cmd); + } else { + setError("unknown directive: " + Tok); + } + } +} + +void ScriptParser::addFile(StringRef S) { + if (IsUnderSysroot && S.startswith("/")) { + SmallString<128> PathData; + StringRef Path = (Config->Sysroot + S).toStringRef(PathData); + if (sys::fs::exists(Path)) { + Driver->addFile(Saver.save(Path)); + return; + } + } + + if (sys::path::is_absolute(S)) { + Driver->addFile(S); + } else if (S.startswith("=")) { + if (Config->Sysroot.empty()) + Driver->addFile(S.substr(1)); + else + Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); + } else if (S.startswith("-l")) { + Driver->addLibrary(S.substr(2)); + } else if (sys::fs::exists(S)) { + Driver->addFile(S); + } else { + if (Optional<std::string> Path = findFromSearchPaths(S)) + Driver->addFile(Saver.save(*Path)); + else + setError("unable to find " + S); + } +} + +void ScriptParser::readAsNeeded() { + expect("("); + bool Orig = Config->AsNeeded; + Config->AsNeeded = true; + while (!Error && !consume(")")) + addFile(unquote(next())); + Config->AsNeeded = Orig; +} + +void ScriptParser::readEntry() { + // -e <symbol> takes predecence over ENTRY(<symbol>). + expect("("); + StringRef Tok = next(); + if (Config->Entry.empty()) + Config->Entry = Tok; + expect(")"); +} + +void ScriptParser::readExtern() { + expect("("); + while (!Error && !consume(")")) + Config->Undefined.push_back(next()); +} + +void ScriptParser::readGroup() { + expect("("); + while (!Error && !consume(")")) { + StringRef Tok = next(); + if (Tok == "AS_NEEDED") + readAsNeeded(); + else + addFile(unquote(Tok)); + } +} + +void ScriptParser::readInclude() { + StringRef Tok = unquote(next()); + + // https://sourceware.org/binutils/docs/ld/File-Commands.html: + // The file will be searched for in the current directory, and in any + // directory specified with the -L option. + if (sys::fs::exists(Tok)) { + if (Optional<MemoryBufferRef> MB = readFile(Tok)) + tokenize(*MB); + return; + } + if (Optional<std::string> Path = findFromSearchPaths(Tok)) { + if (Optional<MemoryBufferRef> MB = readFile(*Path)) + tokenize(*MB); + return; + } + setError("cannot open " + Tok); +} + +void ScriptParser::readOutput() { + // -o <file> takes predecence over OUTPUT(<file>). + expect("("); + StringRef Tok = next(); + if (Config->OutputFile.empty()) + Config->OutputFile = unquote(Tok); + expect(")"); +} + +void ScriptParser::readOutputArch() { + // Error checking only for now. + expect("("); + skip(); + expect(")"); +} + +void ScriptParser::readOutputFormat() { + // Error checking only for now. + expect("("); + skip(); + StringRef Tok = next(); + if (Tok == ")") + return; + if (Tok != ",") { + setError("unexpected token: " + Tok); + return; + } + skip(); + expect(","); + skip(); + expect(")"); +} + +void ScriptParser::readPhdrs() { + expect("{"); + while (!Error && !consume("}")) { + StringRef Tok = next(); + Opt.PhdrsCommands.push_back( + {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); + PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); + + PhdrCmd.Type = readPhdrType(); + do { + Tok = next(); + if (Tok == ";") + break; + if (Tok == "FILEHDR") + PhdrCmd.HasFilehdr = true; + else if (Tok == "PHDRS") + PhdrCmd.HasPhdrs = true; + else if (Tok == "AT") + PhdrCmd.LMAExpr = readParenExpr(); + else if (Tok == "FLAGS") { + expect("("); + // Passing 0 for the value of dot is a bit of a hack. It means that + // we accept expressions like ".|1". + PhdrCmd.Flags = readExpr()(0); + expect(")"); + } else + setError("unexpected header attribute: " + Tok); + } while (!Error); + } +} + +void ScriptParser::readSearchDir() { + expect("("); + StringRef Tok = next(); + if (!Config->Nostdlib) + Config->SearchPaths.push_back(unquote(Tok)); + expect(")"); +} + +void ScriptParser::readSections() { + Opt.HasSections = true; + // -no-rosegment is used to avoid placing read only non-executable sections in + // their own segment. We do the same if SECTIONS command is present in linker + // script. See comment for computeFlags(). + Config->SingleRoRx = true; + + expect("{"); + while (!Error && !consume("}")) { + StringRef Tok = next(); + BaseCommand *Cmd = readProvideOrAssignment(Tok); + if (!Cmd) { + if (Tok == "ASSERT") + Cmd = new AssertCommand(readAssert()); + else + Cmd = readOutputSectionDescription(Tok); + } + Opt.Commands.emplace_back(Cmd); + } +} + +static int precedence(StringRef Op) { + return StringSwitch<int>(Op) + .Cases("*", "/", 5) + .Cases("+", "-", 4) + .Cases("<<", ">>", 3) + .Cases("<", "<=", ">", ">=", "==", "!=", 2) + .Cases("&", "|", 1) + .Default(-1); +} + +StringMatcher ScriptParser::readFilePatterns() { + std::vector<StringRef> V; + while (!Error && !consume(")")) + V.push_back(next()); + return StringMatcher(V); +} + +SortSectionPolicy ScriptParser::readSortKind() { + if (consume("SORT") || consume("SORT_BY_NAME")) + return SortSectionPolicy::Name; + if (consume("SORT_BY_ALIGNMENT")) + return SortSectionPolicy::Alignment; + if (consume("SORT_BY_INIT_PRIORITY")) + return SortSectionPolicy::Priority; + if (consume("SORT_NONE")) + return SortSectionPolicy::None; + return SortSectionPolicy::Default; +} + +// Method reads a list of sequence of excluded files and section globs given in +// a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ +// Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) +// The semantics of that is next: +// * Include .foo.1 from every file. +// * Include .foo.2 from every file but a.o +// * Include .foo.3 from every file but b.o +std::vector<SectionPattern> ScriptParser::readInputSectionsList() { + std::vector<SectionPattern> Ret; + while (!Error && peek() != ")") { + StringMatcher ExcludeFilePat; + if (consume("EXCLUDE_FILE")) { + expect("("); + ExcludeFilePat = readFilePatterns(); + } + + std::vector<StringRef> V; + while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") + V.push_back(next()); + + if (!V.empty()) + Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); + else + setError("section pattern is expected"); + } + return Ret; +} + +// Reads contents of "SECTIONS" directive. That directive contains a +// list of glob patterns for input sections. The grammar is as follows. +// +// <patterns> ::= <section-list> +// | <sort> "(" <section-list> ")" +// | <sort> "(" <sort> "(" <section-list> ")" ")" +// +// <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" +// | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" +// +// <section-list> is parsed by readInputSectionsList(). +InputSectionDescription * +ScriptParser::readInputSectionRules(StringRef FilePattern) { + auto *Cmd = new InputSectionDescription(FilePattern); + expect("("); + while (!Error && !consume(")")) { + SortSectionPolicy Outer = readSortKind(); + SortSectionPolicy Inner = SortSectionPolicy::Default; + std::vector<SectionPattern> V; + if (Outer != SortSectionPolicy::Default) { + expect("("); + Inner = readSortKind(); + if (Inner != SortSectionPolicy::Default) { + expect("("); + V = readInputSectionsList(); + expect(")"); + } else { + V = readInputSectionsList(); + } + expect(")"); + } else { + V = readInputSectionsList(); + } + + for (SectionPattern &Pat : V) { + Pat.SortInner = Inner; + Pat.SortOuter = Outer; + } + + std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); + } + return Cmd; +} + +InputSectionDescription * +ScriptParser::readInputSectionDescription(StringRef Tok) { + // Input section wildcard can be surrounded by KEEP. + // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep + if (Tok == "KEEP") { + expect("("); + StringRef FilePattern = next(); + InputSectionDescription *Cmd = readInputSectionRules(FilePattern); + expect(")"); + Opt.KeptSections.push_back(Cmd); + return Cmd; + } + return readInputSectionRules(Tok); +} + +void ScriptParser::readSort() { + expect("("); + expect("CONSTRUCTORS"); + expect(")"); +} + +Expr ScriptParser::readAssert() { + expect("("); + Expr E = readExpr(); + expect(","); + StringRef Msg = unquote(next()); + expect(")"); + return [=](uint64_t Dot) { + uint64_t V = E(Dot); + if (!V) + error(Msg); + return V; + }; +} + +// Reads a FILL(expr) command. We handle the FILL command as an +// alias for =fillexp section attribute, which is different from +// what GNU linkers do. +// https://sourceware.org/binutils/docs/ld/Output-Section-Data.html +uint32_t ScriptParser::readFill() { + expect("("); + uint32_t V = readOutputSectionFiller(next()); + expect(")"); + expect(";"); + return V; +} + +OutputSectionCommand * +ScriptParser::readOutputSectionDescription(StringRef OutSec) { + OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); + Cmd->Location = getCurrentLocation(); + + // Read an address expression. + // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address + if (peek() != ":") + Cmd->AddrExpr = readExpr(); + + expect(":"); + + if (consume("AT")) + Cmd->LMAExpr = readParenExpr(); + if (consume("ALIGN")) + Cmd->AlignExpr = readParenExpr(); + if (consume("SUBALIGN")) + Cmd->SubalignExpr = readParenExpr(); + + // Parse constraints. + if (consume("ONLY_IF_RO")) + Cmd->Constraint = ConstraintKind::ReadOnly; + if (consume("ONLY_IF_RW")) + Cmd->Constraint = ConstraintKind::ReadWrite; + expect("{"); + + while (!Error && !consume("}")) { + StringRef Tok = next(); + if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { + Cmd->Commands.emplace_back(Assignment); + } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { + Cmd->Commands.emplace_back(Data); + } else if (Tok == "ASSERT") { + Cmd->Commands.emplace_back(new AssertCommand(readAssert())); + expect(";"); + } else if (Tok == "FILL") { + Cmd->Filler = readFill(); + } else if (Tok == "SORT") { + readSort(); + } else if (peek() == "(") { + Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); + } else { + setError("unknown command " + Tok); + } + } + Cmd->Phdrs = readOutputSectionPhdrs(); + + if (consume("=")) + Cmd->Filler = readOutputSectionFiller(next()); + else if (peek().startswith("=")) + Cmd->Filler = readOutputSectionFiller(next().drop_front()); + + return Cmd; +} + +// Read "=<number>" where <number> is an octal/decimal/hexadecimal number. +// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html +// +// ld.gold is not fully compatible with ld.bfd. ld.bfd handles +// hexstrings as blobs of arbitrary sizes, while ld.gold handles them +// as 32-bit big-endian values. We will do the same as ld.gold does +// because it's simpler than what ld.bfd does. +uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { + uint32_t V; + if (!Tok.getAsInteger(0, V)) + return V; + setError("invalid filler expression: " + Tok); + return 0; +} + +SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { + expect("("); + SymbolAssignment *Cmd = readAssignment(next()); + Cmd->Provide = Provide; + Cmd->Hidden = Hidden; + expect(")"); + expect(";"); + return Cmd; +} + +SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { + SymbolAssignment *Cmd = nullptr; + if (peek() == "=" || peek() == "+=") { + Cmd = readAssignment(Tok); + expect(";"); + } else if (Tok == "PROVIDE") { + Cmd = readProvideHidden(true, false); + } else if (Tok == "HIDDEN") { + Cmd = readProvideHidden(false, true); + } else if (Tok == "PROVIDE_HIDDEN") { + Cmd = readProvideHidden(true, true); + } + return Cmd; +} + +static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { + if (S == ".") + return Dot; + return ScriptBase->getSymbolValue(Loc, S); +} + +static bool isAbsolute(StringRef S) { + if (S == ".") + return false; + return ScriptBase->isAbsolute(S); +} + +SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { + StringRef Op = next(); + Expr E; + assert(Op == "=" || Op == "+="); + if (consume("ABSOLUTE")) { + // The RHS may be something like "ABSOLUTE(.) & 0xff". + // Call readExpr1 to read the whole expression. + E = readExpr1(readParenExpr(), 0); + E.IsAbsolute = [] { return true; }; + } else { + E = readExpr(); + } + if (Op == "+=") { + std::string Loc = getCurrentLocation(); + E = [=](uint64_t Dot) { + return getSymbolValue(Loc, Name, Dot) + E(Dot); + }; + } + return new SymbolAssignment(Name, E); +} + +// This is an operator-precedence parser to parse a linker +// script expression. +Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } + +static Expr combine(StringRef Op, Expr L, Expr R) { + if (Op == "*") + return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; + if (Op == "/") { + return [=](uint64_t Dot) -> uint64_t { + uint64_t RHS = R(Dot); + if (RHS == 0) { + error("division by zero"); + return 0; + } + return L(Dot) / RHS; + }; + } + if (Op == "+") + return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, + [=] { return L.IsAbsolute() && R.IsAbsolute(); }, + [=] { + const OutputSectionBase *S = L.Section(); + return S ? S : R.Section(); + }}; + if (Op == "-") + return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; + if (Op == "<<") + return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; + if (Op == ">>") + return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; + if (Op == "<") + return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; + if (Op == ">") + return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; + if (Op == ">=") + return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; + if (Op == "<=") + return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; + if (Op == "==") + return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; + if (Op == "!=") + return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; + if (Op == "&") + return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; + if (Op == "|") + return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; + llvm_unreachable("invalid operator"); +} + +// This is a part of the operator-precedence parser. This function +// assumes that the remaining token stream starts with an operator. +Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { + while (!atEOF() && !Error) { + // Read an operator and an expression. + if (consume("?")) + return readTernary(Lhs); + StringRef Op1 = peek(); + if (precedence(Op1) < MinPrec) + break; + skip(); + Expr Rhs = readPrimary(); + + // Evaluate the remaining part of the expression first if the + // next operator has greater precedence than the previous one. + // For example, if we have read "+" and "3", and if the next + // operator is "*", then we'll evaluate 3 * ... part first. + while (!atEOF()) { + StringRef Op2 = peek(); + if (precedence(Op2) <= precedence(Op1)) + break; + Rhs = readExpr1(Rhs, precedence(Op2)); + } + + Lhs = combine(Op1, Lhs, Rhs); + } + return Lhs; +} + +uint64_t static getConstant(StringRef S) { + if (S == "COMMONPAGESIZE") + return Target->PageSize; + if (S == "MAXPAGESIZE") + return Config->MaxPageSize; + error("unknown constant: " + S); + return 0; +} + +// Parses Tok as an integer. Returns true if successful. +// It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") +// and decimal numbers. Decimal numbers may have "K" (kilo) or +// "M" (mega) prefixes. +static bool readInteger(StringRef Tok, uint64_t &Result) { + // Negative number + if (Tok.startswith("-")) { + if (!readInteger(Tok.substr(1), Result)) + return false; + Result = -Result; + return true; + } + + // Hexadecimal + if (Tok.startswith_lower("0x")) + return !Tok.substr(2).getAsInteger(16, Result); + if (Tok.endswith_lower("H")) + return !Tok.drop_back().getAsInteger(16, Result); + + // Decimal + int Suffix = 1; + if (Tok.endswith_lower("K")) { + Suffix = 1024; + Tok = Tok.drop_back(); + } else if (Tok.endswith_lower("M")) { + Suffix = 1024 * 1024; + Tok = Tok.drop_back(); + } + if (Tok.getAsInteger(10, Result)) + return false; + Result *= Suffix; + return true; +} + +BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { + int Size = StringSwitch<unsigned>(Tok) + .Case("BYTE", 1) + .Case("SHORT", 2) + .Case("LONG", 4) + .Case("QUAD", 8) + .Default(-1); + if (Size == -1) + return nullptr; + + return new BytesDataCommand(readParenExpr(), Size); +} + +StringRef ScriptParser::readParenLiteral() { + expect("("); + StringRef Tok = next(); + expect(")"); + return Tok; +} + +Expr ScriptParser::readPrimary() { + if (peek() == "(") + return readParenExpr(); + + StringRef Tok = next(); + std::string Location = getCurrentLocation(); + + if (Tok == "~") { + Expr E = readPrimary(); + return [=](uint64_t Dot) { return ~E(Dot); }; + } + if (Tok == "-") { + Expr E = readPrimary(); + return [=](uint64_t Dot) { return -E(Dot); }; + } + + // Built-in functions are parsed here. + // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. + if (Tok == "ADDR") { + StringRef Name = readParenLiteral(); + return {[=](uint64_t Dot) { + return ScriptBase->getOutputSection(Location, Name)->Addr; + }, + [=] { return false; }, + [=] { return ScriptBase->getOutputSection(Location, Name); }}; + } + if (Tok == "LOADADDR") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { + return ScriptBase->getOutputSection(Location, Name)->getLMA(); + }; + } + if (Tok == "ASSERT") + return readAssert(); + if (Tok == "ALIGN") { + expect("("); + Expr E = readExpr(); + if (consume(",")) { + Expr E2 = readExpr(); + expect(")"); + return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; + } + expect(")"); + return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; + } + if (Tok == "CONSTANT") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { return getConstant(Name); }; + } + if (Tok == "DEFINED") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; + } + if (Tok == "SEGMENT_START") { + expect("("); + skip(); + expect(","); + Expr E = readExpr(); + expect(")"); + return [=](uint64_t Dot) { return E(Dot); }; + } + if (Tok == "DATA_SEGMENT_ALIGN") { + expect("("); + Expr E = readExpr(); + expect(","); + readExpr(); + expect(")"); + return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; + } + if (Tok == "DATA_SEGMENT_END") { + expect("("); + expect("."); + expect(")"); + return [](uint64_t Dot) { return Dot; }; + } + // GNU linkers implements more complicated logic to handle + // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to + // the next page boundary for simplicity. + if (Tok == "DATA_SEGMENT_RELRO_END") { + expect("("); + readExpr(); + expect(","); + readExpr(); + expect(")"); + return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; + } + if (Tok == "SIZEOF") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; + } + if (Tok == "ALIGNOF") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { + return ScriptBase->getOutputSection(Location, Name)->Addralign; + }; + } + if (Tok == "SIZEOF_HEADERS") + return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; + + // Tok is a literal number. + uint64_t V; + if (readInteger(Tok, V)) + return [=](uint64_t Dot) { return V; }; + + // Tok is a symbol name. + if (Tok != "." && !isValidCIdentifier(Tok)) + setError("malformed number: " + Tok); + return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, + [=] { return isAbsolute(Tok); }, + [=] { return ScriptBase->getSymbolSection(Tok); }}; +} + +Expr ScriptParser::readTernary(Expr Cond) { + Expr L = readExpr(); + expect(":"); + Expr R = readExpr(); + return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; +} + +Expr ScriptParser::readParenExpr() { + expect("("); + Expr E = readExpr(); + expect(")"); + return E; +} + +std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { + std::vector<StringRef> Phdrs; + while (!Error && peek().startswith(":")) { + StringRef Tok = next(); + Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); + } + return Phdrs; +} + +// Read a program header type name. The next token must be a +// name of a program header type or a constant (e.g. "0x3"). +unsigned ScriptParser::readPhdrType() { + StringRef Tok = next(); + uint64_t Val; + if (readInteger(Tok, Val)) + return Val; + + unsigned Ret = StringSwitch<unsigned>(Tok) + .Case("PT_NULL", PT_NULL) + .Case("PT_LOAD", PT_LOAD) + .Case("PT_DYNAMIC", PT_DYNAMIC) + .Case("PT_INTERP", PT_INTERP) + .Case("PT_NOTE", PT_NOTE) + .Case("PT_SHLIB", PT_SHLIB) + .Case("PT_PHDR", PT_PHDR) + .Case("PT_TLS", PT_TLS) + .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) + .Case("PT_GNU_STACK", PT_GNU_STACK) + .Case("PT_GNU_RELRO", PT_GNU_RELRO) + .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) + .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) + .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) + .Default(-1); + + if (Ret == (unsigned)-1) { + setError("invalid program header type: " + Tok); + return PT_NULL; + } + return Ret; +} + +// Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". +void ScriptParser::readAnonymousDeclaration() { + // Read global symbols first. "global:" is default, so if there's + // no label, we assume global symbols. + if (consume("global:") || peek() != "local:") + Config->VersionScriptGlobals = readSymbols(); + + // Next, read local symbols. + if (consume("local:")) { + if (consume("*")) { + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + expect(";"); + } else { + setError("local symbol list for anonymous version is not supported"); + } + } + expect("}"); + expect(";"); +} + +// Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". +void ScriptParser::readVersionDeclaration(StringRef VerStr) { + // Identifiers start at 2 because 0 and 1 are reserved + // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. + uint16_t VersionId = Config->VersionDefinitions.size() + 2; + Config->VersionDefinitions.push_back({VerStr, VersionId}); + + // Read global symbols. + if (consume("global:") || peek() != "local:") + Config->VersionDefinitions.back().Globals = readSymbols(); + + // Read local symbols. + if (consume("local:")) { + if (consume("*")) { + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + expect(";"); + } else { + for (SymbolVersion V : readSymbols()) + Config->VersionScriptLocals.push_back(V); + } + } + expect("}"); + + // Each version may have a parent version. For example, "Ver2" + // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" + // as a parent. This version hierarchy is, probably against your + // instinct, purely for hint; the runtime doesn't care about it + // at all. In LLD, we simply ignore it. + if (peek() != ";") + skip(); + expect(";"); +} + +// Reads a list of symbols for a versions cript. +std::vector<SymbolVersion> ScriptParser::readSymbols() { + std::vector<SymbolVersion> Ret; + for (;;) { + if (consume("extern")) { + for (SymbolVersion V : readVersionExtern()) + Ret.push_back(V); + continue; + } + + if (peek() == "}" || peek() == "local:" || Error) + break; + StringRef Tok = next(); + Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); + expect(";"); + } + return Ret; +} + +// Reads an "extern C++" directive, e.g., +// "extern "C++" { ns::*; "f(int, double)"; };" +std::vector<SymbolVersion> ScriptParser::readVersionExtern() { + StringRef Tok = next(); + bool IsCXX = Tok == "\"C++\""; + if (!IsCXX && Tok != "\"C\"") + setError("Unknown language"); + expect("{"); + + std::vector<SymbolVersion> Ret; + while (!Error && peek() != "}") { + StringRef Tok = next(); + bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); + Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); + expect(";"); + } + + expect("}"); + expect(";"); + return Ret; +} + +void elf::readLinkerScript(MemoryBufferRef MB) { + ScriptParser(MB).readLinkerScript(); +} + +void elf::readVersionScript(MemoryBufferRef MB) { + ScriptParser(MB).readVersionScript(); +} + +void elf::readDynamicList(MemoryBufferRef MB) { + ScriptParser(MB).readDynamicList(); +} + +template class elf::LinkerScript<ELF32LE>; +template class elf::LinkerScript<ELF32BE>; +template class elf::LinkerScript<ELF64LE>; +template class elf::LinkerScript<ELF64BE>; diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.h b/contrib/llvm/tools/lld/ELF/LinkerScript.h new file mode 100644 index 000000000000..505162f0ab43 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.h @@ -0,0 +1,298 @@ +//===- LinkerScript.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_LINKER_SCRIPT_H +#define LLD_ELF_LINKER_SCRIPT_H + +#include "Config.h" +#include "Strings.h" +#include "Writer.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstddef> +#include <cstdint> +#include <functional> +#include <memory> +#include <vector> + +namespace lld { +namespace elf { + +class DefinedCommon; +class ScriptParser; +class SymbolBody; +template <class ELFT> class InputSectionBase; +template <class ELFT> class InputSection; +class OutputSectionBase; +template <class ELFT> class OutputSectionFactory; +class InputSectionData; + +// This represents an expression in the linker script. +// ScriptParser::readExpr reads an expression and returns an Expr. +// Later, we evaluate the expression by calling the function +// with the value of special context variable ".". +struct Expr { + std::function<uint64_t(uint64_t)> Val; + std::function<bool()> IsAbsolute; + + // If expression is section-relative the function below is used + // to get the output section pointer. + std::function<const OutputSectionBase *()> Section; + + uint64_t operator()(uint64_t Dot) const { return Val(Dot); } + operator bool() const { return (bool)Val; } + + Expr(std::function<uint64_t(uint64_t)> Val, std::function<bool()> IsAbsolute, + std::function<const OutputSectionBase *()> Section) + : Val(Val), IsAbsolute(IsAbsolute), Section(Section) {} + template <typename T> + Expr(T V) : Expr(V, [] { return true; }, [] { return nullptr; }) {} + Expr() : Expr(nullptr) {} +}; + +// Parses a linker script. Calling this function updates +// Config and ScriptConfig. +void readLinkerScript(MemoryBufferRef MB); + +// Parses a version script. +void readVersionScript(MemoryBufferRef MB); + +void readDynamicList(MemoryBufferRef MB); + +// This enum is used to implement linker script SECTIONS command. +// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS +enum SectionsCommandKind { + AssignmentKind, // . = expr or <sym> = expr + OutputSectionKind, + InputSectionKind, + AssertKind, // ASSERT(expr) + BytesDataKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) +}; + +struct BaseCommand { + BaseCommand(int K) : Kind(K) {} + + virtual ~BaseCommand() = default; + + int Kind; +}; + +// This represents ". = <expr>" or "<symbol> = <expr>". +struct SymbolAssignment : BaseCommand { + SymbolAssignment(StringRef Name, Expr E) + : BaseCommand(AssignmentKind), Name(Name), Expression(E) {} + + static bool classof(const BaseCommand *C); + + // The LHS of an expression. Name is either a symbol name or ".". + StringRef Name; + SymbolBody *Sym = nullptr; + + // The RHS of an expression. + Expr Expression; + + // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. + bool Provide = false; + bool Hidden = false; +}; + +// Linker scripts allow additional constraints to be put on ouput sections. +// If an output section is marked as ONLY_IF_RO, the section is created +// only if its input sections are read-only. Likewise, an output section +// with ONLY_IF_RW is created if all input sections are RW. +enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; + +struct OutputSectionCommand : BaseCommand { + OutputSectionCommand(StringRef Name) + : BaseCommand(OutputSectionKind), Name(Name) {} + + static bool classof(const BaseCommand *C); + + StringRef Name; + Expr AddrExpr; + Expr AlignExpr; + Expr LMAExpr; + Expr SubalignExpr; + std::vector<std::unique_ptr<BaseCommand>> Commands; + std::vector<StringRef> Phdrs; + uint32_t Filler = 0; + ConstraintKind Constraint = ConstraintKind::NoConstraint; + std::string Location; +}; + +// This struct represents one section match pattern in SECTIONS() command. +// It can optionally have negative match pattern for EXCLUDED_FILE command. +// Also it may be surrounded with SORT() command, so contains sorting rules. +struct SectionPattern { + SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2) + : ExcludedFilePat(Pat1), SectionPat(Pat2) {} + + StringMatcher ExcludedFilePat; + StringMatcher SectionPat; + SortSectionPolicy SortOuter; + SortSectionPolicy SortInner; +}; + +struct InputSectionDescription : BaseCommand { + InputSectionDescription(StringRef FilePattern) + : BaseCommand(InputSectionKind), FilePat(FilePattern) {} + + static bool classof(const BaseCommand *C); + + StringMatcher FilePat; + + // Input sections that matches at least one of SectionPatterns + // will be associated with this InputSectionDescription. + std::vector<SectionPattern> SectionPatterns; + + std::vector<InputSectionData *> Sections; +}; + +// Represents an ASSERT(). +struct AssertCommand : BaseCommand { + AssertCommand(Expr E) : BaseCommand(AssertKind), Expression(E) {} + + static bool classof(const BaseCommand *C); + + Expr Expression; +}; + +// Represents BYTE(), SHORT(), LONG(), or QUAD(). +struct BytesDataCommand : BaseCommand { + BytesDataCommand(Expr E, unsigned Size) + : BaseCommand(BytesDataKind), Expression(E), Size(Size) {} + + static bool classof(const BaseCommand *C); + + Expr Expression; + unsigned Offset; + unsigned Size; +}; + +struct PhdrsCommand { + StringRef Name; + unsigned Type; + bool HasFilehdr; + bool HasPhdrs; + unsigned Flags; + Expr LMAExpr; +}; + +class LinkerScriptBase { +protected: + ~LinkerScriptBase() = default; + +public: + virtual uint64_t getHeaderSize() = 0; + virtual uint64_t getSymbolValue(const Twine &Loc, StringRef S) = 0; + virtual bool isDefined(StringRef S) = 0; + virtual bool isAbsolute(StringRef S) = 0; + virtual const OutputSectionBase *getSymbolSection(StringRef S) = 0; + virtual const OutputSectionBase *getOutputSection(const Twine &Loc, + StringRef S) = 0; + virtual uint64_t getOutputSectionSize(StringRef S) = 0; +}; + +// ScriptConfiguration holds linker script parse results. +struct ScriptConfiguration { + // Used to assign addresses to sections. + std::vector<std::unique_ptr<BaseCommand>> Commands; + + // Used to assign sections to headers. + std::vector<PhdrsCommand> PhdrsCommands; + + bool HasSections = false; + + // List of section patterns specified with KEEP commands. They will + // be kept even if they are unused and --gc-sections is specified. + std::vector<InputSectionDescription *> KeptSections; +}; + +extern ScriptConfiguration *ScriptConfig; + +// This is a runner of the linker script. +template <class ELFT> class LinkerScript final : public LinkerScriptBase { + typedef typename ELFT::uint uintX_t; + +public: + LinkerScript(); + ~LinkerScript(); + + void processCommands(OutputSectionFactory<ELFT> &Factory); + void addOrphanSections(OutputSectionFactory<ELFT> &Factory); + void removeEmptyCommands(); + void adjustSectionsBeforeSorting(); + void adjustSectionsAfterSorting(); + + std::vector<PhdrEntry> createPhdrs(); + bool ignoreInterpSection(); + + uint32_t getFiller(StringRef Name); + void writeDataBytes(StringRef Name, uint8_t *Buf); + bool hasLMA(StringRef Name); + bool shouldKeep(InputSectionBase<ELFT> *S); + void assignOffsets(OutputSectionCommand *Cmd); + void placeOrphanSections(); + void assignAddresses(std::vector<PhdrEntry> &Phdrs); + bool hasPhdrsCommands(); + uint64_t getHeaderSize() override; + uint64_t getSymbolValue(const Twine &Loc, StringRef S) override; + bool isDefined(StringRef S) override; + bool isAbsolute(StringRef S) override; + const OutputSectionBase *getSymbolSection(StringRef S) override; + const OutputSectionBase *getOutputSection(const Twine &Loc, + StringRef S) override; + uint64_t getOutputSectionSize(StringRef S) override; + + std::vector<OutputSectionBase *> *OutputSections; + + int getSectionIndex(StringRef Name); + +private: + void computeInputSections(InputSectionDescription *); + + void addSection(OutputSectionFactory<ELFT> &Factory, + InputSectionBase<ELFT> *Sec, StringRef Name); + void discard(ArrayRef<InputSectionBase<ELFT> *> V); + + std::vector<InputSectionBase<ELFT> *> + createInputSectionList(OutputSectionCommand &Cmd); + + // "ScriptConfig" is a bit too long, so define a short name for it. + ScriptConfiguration &Opt = *ScriptConfig; + + std::vector<size_t> getPhdrIndices(StringRef SectionName); + size_t getPhdrIndex(const Twine &Loc, StringRef PhdrName); + + uintX_t Dot; + uintX_t LMAOffset = 0; + OutputSectionBase *CurOutSec = nullptr; + uintX_t ThreadBssOffset = 0; + void switchTo(OutputSectionBase *Sec); + void flush(); + void output(InputSection<ELFT> *Sec); + void process(BaseCommand &Base); + llvm::DenseSet<OutputSectionBase *> AlreadyOutputOS; + llvm::DenseSet<InputSectionData *> AlreadyOutputIS; +}; + +// Variable template is a C++14 feature, so we can't template +// a global variable. Use a struct to workaround. +template <class ELFT> struct Script { static LinkerScript<ELFT> *X; }; +template <class ELFT> LinkerScript<ELFT> *Script<ELFT>::X; + +extern LinkerScriptBase *ScriptBase; + +} // end namespace elf +} // end namespace lld + +#endif // LLD_ELF_LINKER_SCRIPT_H diff --git a/contrib/llvm/tools/lld/ELF/MarkLive.cpp b/contrib/llvm/tools/lld/ELF/MarkLive.cpp new file mode 100644 index 000000000000..8d129fc3ff13 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/MarkLive.cpp @@ -0,0 +1,255 @@ +//===- MarkLive.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements --gc-sections, which is a feature to remove unused +// sections from output. Unused sections are sections that are not reachable +// from known GC-root symbols or sections. Naturally the feature is +// implemented as a mark-sweep garbage collector. +// +// Here's how it works. Each InputSectionBase has a "Live" bit. The bit is off +// by default. Starting with GC-root symbols or sections, markLive function +// defined in this file visits all reachable sections to set their Live +// bits. Writer will then ignore sections whose Live bits are off, so that +// such sections are not included into output. +// +//===----------------------------------------------------------------------===// + +#include "InputSection.h" +#include "LinkerScript.h" +#include "OutputSections.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "Target.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/ELF.h" +#include <functional> +#include <vector> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +namespace { +// A resolved relocation. The Sec and Offset fields are set if the relocation +// was resolved to an offset within a section. +template <class ELFT> struct ResolvedReloc { + InputSectionBase<ELFT> *Sec; + typename ELFT::uint Offset; +}; +} // end anonymous namespace + +template <class ELFT> +static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec, + const typename ELFT::Rel &Rel) { + return Target->getImplicitAddend(Sec.Data.begin() + Rel.r_offset, + Rel.getType(Config->Mips64EL)); +} + +template <class ELFT> +static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec, + const typename ELFT::Rela &Rel) { + return Rel.r_addend; +} + +template <class ELFT, class RelT> +static ResolvedReloc<ELFT> resolveReloc(InputSectionBase<ELFT> &Sec, + RelT &Rel) { + SymbolBody &B = Sec.getFile()->getRelocTargetSym(Rel); + auto *D = dyn_cast<DefinedRegular<ELFT>>(&B); + if (!D || !D->Section) + return {nullptr, 0}; + typename ELFT::uint Offset = D->Value; + if (D->isSection()) + Offset += getAddend(Sec, Rel); + return {D->Section->Repl, Offset}; +} + +// Calls Fn for each section that Sec refers to via relocations. +template <class ELFT> +static void forEachSuccessor(InputSection<ELFT> &Sec, + std::function<void(ResolvedReloc<ELFT>)> Fn) { + if (Sec.AreRelocsRela) { + for (const typename ELFT::Rela &Rel : Sec.relas()) + Fn(resolveReloc(Sec, Rel)); + } else { + for (const typename ELFT::Rel &Rel : Sec.rels()) + Fn(resolveReloc(Sec, Rel)); + } + if (Sec.DependentSection) + Fn({Sec.DependentSection, 0}); +} + +// The .eh_frame section is an unfortunate special case. +// The section is divided in CIEs and FDEs and the relocations it can have are +// * CIEs can refer to a personality function. +// * FDEs can refer to a LSDA +// * FDEs refer to the function they contain information about +// The last kind of relocation cannot keep the referred section alive, or they +// would keep everything alive in a common object file. In fact, each FDE is +// alive if the section it refers to is alive. +// To keep things simple, in here we just ignore the last relocation kind. The +// other two keep the referred section alive. +// +// A possible improvement would be to fully process .eh_frame in the middle of +// the gc pass. With that we would be able to also gc some sections holding +// LSDAs and personality functions if we found that they were unused. +template <class ELFT, class RelTy> +static void +scanEhFrameSection(EhInputSection<ELFT> &EH, ArrayRef<RelTy> Rels, + std::function<void(ResolvedReloc<ELFT>)> Enqueue) { + const endianness E = ELFT::TargetEndianness; + for (unsigned I = 0, N = EH.Pieces.size(); I < N; ++I) { + EhSectionPiece &Piece = EH.Pieces[I]; + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI == (unsigned)-1) + continue; + if (read32<E>(Piece.data().data() + 4) == 0) { + // This is a CIE, we only need to worry about the first relocation. It is + // known to point to the personality function. + Enqueue(resolveReloc(EH, Rels[FirstRelI])); + continue; + } + // This is a FDE. The relocations point to the described function or to + // a LSDA. We only need to keep the LSDA alive, so ignore anything that + // points to executable sections. + typename ELFT::uint PieceEnd = Piece.InputOff + Piece.size(); + for (unsigned I2 = FirstRelI, N2 = Rels.size(); I2 < N2; ++I2) { + const RelTy &Rel = Rels[I2]; + if (Rel.r_offset >= PieceEnd) + break; + ResolvedReloc<ELFT> R = resolveReloc(EH, Rels[I2]); + if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) + continue; + if (R.Sec->Flags & SHF_EXECINSTR) + continue; + Enqueue({R.Sec, 0}); + } + } +} + +template <class ELFT> +static void +scanEhFrameSection(EhInputSection<ELFT> &EH, + std::function<void(ResolvedReloc<ELFT>)> Enqueue) { + if (!EH.NumRelocations) + return; + + // Unfortunately we need to split .eh_frame early since some relocations in + // .eh_frame keep other section alive and some don't. + EH.split(); + + if (EH.AreRelocsRela) + scanEhFrameSection(EH, EH.relas(), Enqueue); + else + scanEhFrameSection(EH, EH.rels(), Enqueue); +} + +// We do not garbage-collect two types of sections: +// 1) Sections used by the loader (.init, .fini, .ctors, .dtors or .jcr) +// 2) Non-allocatable sections which typically contain debugging information +template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) { + switch (Sec->Type) { + case SHT_FINI_ARRAY: + case SHT_INIT_ARRAY: + case SHT_NOTE: + case SHT_PREINIT_ARRAY: + return true; + default: + if (!(Sec->Flags & SHF_ALLOC)) + return true; + + // We do not want to reclaim sections if they can be referred + // by __start_* and __stop_* symbols. + StringRef S = Sec->Name; + if (isValidCIdentifier(S)) + return true; + + return S.startswith(".ctors") || S.startswith(".dtors") || + S.startswith(".init") || S.startswith(".fini") || + S.startswith(".jcr"); + } +} + +// This is the main function of the garbage collector. +// Starting from GC-root sections, this function visits all reachable +// sections to set their "Live" bits. +template <class ELFT> void elf::markLive() { + SmallVector<InputSection<ELFT> *, 256> Q; + + auto Enqueue = [&](ResolvedReloc<ELFT> R) { + // Skip over discarded sections. This in theory shouldn't happen, because + // the ELF spec doesn't allow a relocation to point to a deduplicated + // COMDAT section directly. Unfortunately this happens in practice (e.g. + // .eh_frame) so we need to add a check. + if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) + return; + + // We don't gc non alloc sections. + if (!(R.Sec->Flags & SHF_ALLOC)) + return; + + // Usually, a whole section is marked as live or dead, but in mergeable + // (splittable) sections, each piece of data has independent liveness bit. + // So we explicitly tell it which offset is in use. + if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(R.Sec)) + MS->markLiveAt(R.Offset); + + if (R.Sec->Live) + return; + R.Sec->Live = true; + // Add input section to the queue. + if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(R.Sec)) + Q.push_back(S); + }; + + auto MarkSymbol = [&](const SymbolBody *Sym) { + if (auto *D = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym)) + Enqueue({D->Section, D->Value}); + }; + + // Add GC root symbols. + MarkSymbol(Symtab<ELFT>::X->find(Config->Entry)); + MarkSymbol(Symtab<ELFT>::X->find(Config->Init)); + MarkSymbol(Symtab<ELFT>::X->find(Config->Fini)); + for (StringRef S : Config->Undefined) + MarkSymbol(Symtab<ELFT>::X->find(S)); + + // Preserve externally-visible symbols if the symbols defined by this + // file can interrupt other ELF file's symbols at runtime. + for (const Symbol *S : Symtab<ELFT>::X->getSymbols()) + if (S->includeInDynsym()) + MarkSymbol(S->body()); + + // Preserve special sections and those which are specified in linker + // script KEEP command. + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + // .eh_frame is always marked as live now, but also it can reference to + // sections that contain personality. We preserve all non-text sections + // referred by .eh_frame here. + if (auto *EH = dyn_cast_or_null<EhInputSection<ELFT>>(Sec)) + scanEhFrameSection<ELFT>(*EH, Enqueue); + if (isReserved(Sec) || Script<ELFT>::X->shouldKeep(Sec)) + Enqueue({Sec, 0}); + } + + // Mark all reachable sections. + while (!Q.empty()) + forEachSuccessor<ELFT>(*Q.pop_back_val(), Enqueue); +} + +template void elf::markLive<ELF32LE>(); +template void elf::markLive<ELF32BE>(); +template void elf::markLive<ELF64LE>(); +template void elf::markLive<ELF64BE>(); diff --git a/contrib/llvm/tools/lld/ELF/Memory.h b/contrib/llvm/tools/lld/ELF/Memory.h new file mode 100644 index 000000000000..e5a04ed1e5a8 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Memory.h @@ -0,0 +1,67 @@ +//===- Memory.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines arena allocators. +// +// Almost all large objects, such as files, sections or symbols, are +// used for the entire lifetime of the linker once they are created. +// This usage characteristic makes arena allocator an attractive choice +// where the entire linker is one arena. With an arena, newly created +// objects belong to the arena and freed all at once when everything is done. +// Arena allocators are efficient and easy to understand. +// Most objects are allocated using the arena allocators defined by this file. +// +// If you edit this file, please edit COFF/Memory.h too. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_MEMORY_H +#define LLD_ELF_MEMORY_H + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" +#include <vector> + +namespace lld { +namespace elf { + +// Use this arena if your object doesn't have a destructor. +extern llvm::BumpPtrAllocator BAlloc; +extern llvm::StringSaver Saver; + +// These two classes are hack to keep track of all +// SpecificBumpPtrAllocator instances. +struct SpecificAllocBase { + SpecificAllocBase() { Instances.push_back(this); } + virtual ~SpecificAllocBase() = default; + virtual void reset() = 0; + static std::vector<SpecificAllocBase *> Instances; +}; + +template <class T> struct SpecificAlloc : public SpecificAllocBase { + void reset() override { Alloc.DestroyAll(); } + llvm::SpecificBumpPtrAllocator<T> Alloc; +}; + +// Use this arena if your object has a destructor. +// Your destructor will be invoked from freeArena(). +template <typename T, typename... U> T *make(U &&... Args) { + static SpecificAlloc<T> Alloc; + return new (Alloc.Alloc.Allocate()) T(std::forward<U>(Args)...); +} + +inline void freeArena() { + for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances) + Alloc->reset(); + BAlloc.Reset(); +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Mips.cpp b/contrib/llvm/tools/lld/ELF/Mips.cpp new file mode 100644 index 000000000000..ac65672b70fc --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Mips.cpp @@ -0,0 +1,369 @@ +//===- Mips.cpp ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains a helper function for the Writer. +// +//===---------------------------------------------------------------------===// + +#include "Error.h" +#include "InputFiles.h" +#include "SymbolTable.h" +#include "Writer.h" + +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/MipsABIFlags.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +namespace { +struct ArchTreeEdge { + uint32_t Child; + uint32_t Parent; +}; + +struct FileFlags { + StringRef Filename; + uint32_t Flags; +}; +} + +static StringRef getAbiName(uint32_t Flags) { + switch (Flags) { + case 0: + return "n64"; + case EF_MIPS_ABI2: + return "n32"; + case EF_MIPS_ABI_O32: + return "o32"; + case EF_MIPS_ABI_O64: + return "o64"; + case EF_MIPS_ABI_EABI32: + return "eabi32"; + case EF_MIPS_ABI_EABI64: + return "eabi64"; + default: + return "unknown"; + } +} + +static StringRef getNanName(bool IsNan2008) { + return IsNan2008 ? "2008" : "legacy"; +} + +static StringRef getFpName(bool IsFp64) { return IsFp64 ? "64" : "32"; } + +static void checkFlags(ArrayRef<FileFlags> Files) { + uint32_t ABI = Files[0].Flags & (EF_MIPS_ABI | EF_MIPS_ABI2); + bool Nan = Files[0].Flags & EF_MIPS_NAN2008; + bool Fp = Files[0].Flags & EF_MIPS_FP64; + + for (const FileFlags &F : Files.slice(1)) { + uint32_t ABI2 = F.Flags & (EF_MIPS_ABI | EF_MIPS_ABI2); + if (ABI != ABI2) + error("target ABI '" + getAbiName(ABI) + "' is incompatible with '" + + getAbiName(ABI2) + "': " + F.Filename); + + bool Nan2 = F.Flags & EF_MIPS_NAN2008; + if (Nan != Nan2) + error("target -mnan=" + getNanName(Nan) + " is incompatible with -mnan=" + + getNanName(Nan2) + ": " + F.Filename); + + bool Fp2 = F.Flags & EF_MIPS_FP64; + if (Fp != Fp2) + error("target -mfp" + getFpName(Fp) + " is incompatible with -mfp" + + getFpName(Fp2) + ": " + F.Filename); + } +} + +static uint32_t getMiscFlags(ArrayRef<FileFlags> Files) { + uint32_t Ret = 0; + for (const FileFlags &F : Files) + Ret |= F.Flags & + (EF_MIPS_ABI | EF_MIPS_ABI2 | EF_MIPS_ARCH_ASE | EF_MIPS_NOREORDER | + EF_MIPS_MICROMIPS | EF_MIPS_NAN2008 | EF_MIPS_32BITMODE); + return Ret; +} + +static uint32_t getPicFlags(ArrayRef<FileFlags> Files) { + // Check PIC/non-PIC compatibility. + bool IsPic = Files[0].Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + for (const FileFlags &F : Files.slice(1)) { + bool IsPic2 = F.Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + if (IsPic && !IsPic2) + warn("linking abicalls code with non-abicalls file: " + F.Filename); + if (!IsPic && IsPic2) + warn("linking non-abicalls code with abicalls file: " + F.Filename); + } + + // Compute the result PIC/non-PIC flag. + uint32_t Ret = Files[0].Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + for (const FileFlags &F : Files.slice(1)) + Ret &= F.Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + + // PIC code is inherently CPIC and may not set CPIC flag explicitly. + if (Ret & EF_MIPS_PIC) + Ret |= EF_MIPS_CPIC; + return Ret; +} + +static ArchTreeEdge ArchTree[] = { + // MIPS32R6 and MIPS64R6 are not compatible with other extensions + // MIPS64R2 extensions. + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_OCTEON3, EF_MIPS_ARCH_64R2}, + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_OCTEON2, EF_MIPS_ARCH_64R2}, + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_OCTEON, EF_MIPS_ARCH_64R2}, + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_LS3A, EF_MIPS_ARCH_64R2}, + // MIPS64 extensions. + {EF_MIPS_ARCH_64 | EF_MIPS_MACH_SB1, EF_MIPS_ARCH_64}, + {EF_MIPS_ARCH_64 | EF_MIPS_MACH_XLR, EF_MIPS_ARCH_64}, + {EF_MIPS_ARCH_64R2, EF_MIPS_ARCH_64}, + // MIPS V extensions. + {EF_MIPS_ARCH_64, EF_MIPS_ARCH_5}, + // R5000 extensions. + {EF_MIPS_ARCH_4 | EF_MIPS_MACH_5500, EF_MIPS_ARCH_4 | EF_MIPS_MACH_5400}, + // MIPS IV extensions. + {EF_MIPS_ARCH_4 | EF_MIPS_MACH_5400, EF_MIPS_ARCH_4}, + {EF_MIPS_ARCH_4 | EF_MIPS_MACH_9000, EF_MIPS_ARCH_4}, + {EF_MIPS_ARCH_5, EF_MIPS_ARCH_4}, + // VR4100 extensions. + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4111, EF_MIPS_ARCH_3 | EF_MIPS_MACH_4100}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4120, EF_MIPS_ARCH_3 | EF_MIPS_MACH_4100}, + // MIPS III extensions. + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4010, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4100, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4650, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_5900, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_LS2E, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_LS2F, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_4, EF_MIPS_ARCH_3}, + // MIPS32 extensions. + {EF_MIPS_ARCH_32R2, EF_MIPS_ARCH_32}, + // MIPS II extensions. + {EF_MIPS_ARCH_3, EF_MIPS_ARCH_2}, + {EF_MIPS_ARCH_32, EF_MIPS_ARCH_2}, + // MIPS I extensions. + {EF_MIPS_ARCH_1 | EF_MIPS_MACH_3900, EF_MIPS_ARCH_1}, + {EF_MIPS_ARCH_2, EF_MIPS_ARCH_1}, +}; + +static bool isArchMatched(uint32_t New, uint32_t Res) { + if (New == Res) + return true; + if (New == EF_MIPS_ARCH_32 && isArchMatched(EF_MIPS_ARCH_64, Res)) + return true; + if (New == EF_MIPS_ARCH_32R2 && isArchMatched(EF_MIPS_ARCH_64R2, Res)) + return true; + for (const auto &Edge : ArchTree) { + if (Res == Edge.Child) { + Res = Edge.Parent; + if (Res == New) + return true; + } + } + return false; +} + +static StringRef getMachName(uint32_t Flags) { + switch (Flags & EF_MIPS_MACH) { + case EF_MIPS_MACH_NONE: + return ""; + case EF_MIPS_MACH_3900: + return "r3900"; + case EF_MIPS_MACH_4010: + return "r4010"; + case EF_MIPS_MACH_4100: + return "r4100"; + case EF_MIPS_MACH_4650: + return "r4650"; + case EF_MIPS_MACH_4120: + return "r4120"; + case EF_MIPS_MACH_4111: + return "r4111"; + case EF_MIPS_MACH_5400: + return "vr5400"; + case EF_MIPS_MACH_5900: + return "vr5900"; + case EF_MIPS_MACH_5500: + return "vr5500"; + case EF_MIPS_MACH_9000: + return "rm9000"; + case EF_MIPS_MACH_LS2E: + return "loongson2e"; + case EF_MIPS_MACH_LS2F: + return "loongson2f"; + case EF_MIPS_MACH_LS3A: + return "loongson3a"; + case EF_MIPS_MACH_OCTEON: + return "octeon"; + case EF_MIPS_MACH_OCTEON2: + return "octeon2"; + case EF_MIPS_MACH_OCTEON3: + return "octeon3"; + case EF_MIPS_MACH_SB1: + return "sb1"; + case EF_MIPS_MACH_XLR: + return "xlr"; + default: + return "unknown machine"; + } +} + +static StringRef getArchName(uint32_t Flags) { + StringRef S = getMachName(Flags); + if (!S.empty()) + return S; + + switch (Flags & EF_MIPS_ARCH) { + case EF_MIPS_ARCH_1: + return "mips1"; + case EF_MIPS_ARCH_2: + return "mips2"; + case EF_MIPS_ARCH_3: + return "mips3"; + case EF_MIPS_ARCH_4: + return "mips4"; + case EF_MIPS_ARCH_5: + return "mips5"; + case EF_MIPS_ARCH_32: + return "mips32"; + case EF_MIPS_ARCH_64: + return "mips64"; + case EF_MIPS_ARCH_32R2: + return "mips32r2"; + case EF_MIPS_ARCH_64R2: + return "mips64r2"; + case EF_MIPS_ARCH_32R6: + return "mips32r6"; + case EF_MIPS_ARCH_64R6: + return "mips64r6"; + default: + return "unknown arch"; + } +} + +// There are (arguably too) many MIPS ISAs out there. Their relationships +// can be represented as a forest. If all input files have ISAs which +// reachable by repeated proceeding from the single child to the parent, +// these input files are compatible. In that case we need to return "highest" +// ISA. If there are incompatible input files, we show an error. +// For example, mips1 is a "parent" of mips2 and such files are compatible. +// Output file gets EF_MIPS_ARCH_2 flag. From the other side mips3 and mips32 +// are incompatible because nor mips3 is a parent for misp32, nor mips32 +// is a parent for mips3. +static uint32_t getArchFlags(ArrayRef<FileFlags> Files) { + uint32_t Ret = Files[0].Flags & (EF_MIPS_ARCH | EF_MIPS_MACH); + + for (const FileFlags &F : Files.slice(1)) { + uint32_t New = F.Flags & (EF_MIPS_ARCH | EF_MIPS_MACH); + + // Check ISA compatibility. + if (isArchMatched(New, Ret)) + continue; + if (!isArchMatched(Ret, New)) { + error("target ISA '" + getArchName(Ret) + "' is incompatible with '" + + getArchName(New) + "': " + F.Filename); + return 0; + } + Ret = New; + } + return Ret; +} + +template <class ELFT> uint32_t elf::getMipsEFlags() { + std::vector<FileFlags> V; + for (elf::ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) + V.push_back({F->getName(), F->getObj().getHeader()->e_flags}); + if (V.empty()) + return 0; + checkFlags(V); + return getMiscFlags(V) | getPicFlags(V) | getArchFlags(V); +} + +static int compareMipsFpAbi(uint8_t FpA, uint8_t FpB) { + if (FpA == FpB) + return 0; + if (FpB == Mips::Val_GNU_MIPS_ABI_FP_ANY) + return 1; + if (FpB == Mips::Val_GNU_MIPS_ABI_FP_64A && + FpA == Mips::Val_GNU_MIPS_ABI_FP_64) + return 1; + if (FpB != Mips::Val_GNU_MIPS_ABI_FP_XX) + return -1; + if (FpA == Mips::Val_GNU_MIPS_ABI_FP_DOUBLE || + FpA == Mips::Val_GNU_MIPS_ABI_FP_64 || + FpA == Mips::Val_GNU_MIPS_ABI_FP_64A) + return 1; + return -1; +} + +static StringRef getMipsFpAbiName(uint8_t FpAbi) { + switch (FpAbi) { + case Mips::Val_GNU_MIPS_ABI_FP_ANY: + return "any"; + case Mips::Val_GNU_MIPS_ABI_FP_DOUBLE: + return "-mdouble-float"; + case Mips::Val_GNU_MIPS_ABI_FP_SINGLE: + return "-msingle-float"; + case Mips::Val_GNU_MIPS_ABI_FP_SOFT: + return "-msoft-float"; + case Mips::Val_GNU_MIPS_ABI_FP_OLD_64: + return "-mips32r2 -mfp64 (old)"; + case Mips::Val_GNU_MIPS_ABI_FP_XX: + return "-mfpxx"; + case Mips::Val_GNU_MIPS_ABI_FP_64: + return "-mgp32 -mfp64"; + case Mips::Val_GNU_MIPS_ABI_FP_64A: + return "-mgp32 -mfp64 -mno-odd-spreg"; + default: + return "unknown"; + } +} + +uint8_t elf::getMipsFpAbiFlag(uint8_t OldFlag, uint8_t NewFlag, + StringRef FileName) { + if (compareMipsFpAbi(NewFlag, OldFlag) >= 0) + return NewFlag; + if (compareMipsFpAbi(OldFlag, NewFlag) < 0) + error("target floating point ABI '" + getMipsFpAbiName(OldFlag) + + "' is incompatible with '" + getMipsFpAbiName(NewFlag) + "': " + + FileName); + return OldFlag; +} + +template <class ELFT> static bool isN32Abi(const InputFile *F) { + if (auto *EF = dyn_cast<ELFFileBase<ELFT>>(F)) + return EF->getObj().getHeader()->e_flags & EF_MIPS_ABI2; + return false; +} + +bool elf::isMipsN32Abi(const InputFile *F) { + switch (Config->EKind) { + case ELF32LEKind: + return isN32Abi<ELF32LE>(F); + case ELF32BEKind: + return isN32Abi<ELF32BE>(F); + case ELF64LEKind: + return isN32Abi<ELF64LE>(F); + case ELF64BEKind: + return isN32Abi<ELF64BE>(F); + default: + llvm_unreachable("unknown Config->EKind"); + } +} + +template uint32_t elf::getMipsEFlags<ELF32LE>(); +template uint32_t elf::getMipsEFlags<ELF32BE>(); +template uint32_t elf::getMipsEFlags<ELF64LE>(); +template uint32_t elf::getMipsEFlags<ELF64BE>(); diff --git a/contrib/llvm/tools/lld/ELF/Options.td b/contrib/llvm/tools/lld/ELF/Options.td new file mode 100644 index 000000000000..d436f056d013 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Options.td @@ -0,0 +1,367 @@ +include "llvm/Option/OptParser.td" + +// For options whose names are multiple letters, either one dash or +// two can precede the option name except those that start with 'o'. +class F<string name>: Flag<["--", "-"], name>; +class J<string name>: Joined<["--", "-"], name>; +class S<string name>: Separate<["--", "-"], name>; +class JS<string name>: JoinedOrSeparate<["--", "-"], name>; + +def auxiliary: S<"auxiliary">, HelpText<"Set DT_AUXILIARY field to the specified name">; + +def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind defined symbols locally">; + +def Bsymbolic_functions: F<"Bsymbolic-functions">, + HelpText<"Bind defined function symbols locally">; + +def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries">; + +def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">; + +def build_id: F<"build-id">, HelpText<"Generate build ID note">; + +def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">; + +def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">, + HelpText<"Add a directory to the library search path">; + +def O: Joined<["-"], "O">, HelpText<"Optimize output file size">; + +def Tbss: S<"Tbss">, HelpText<"Same as --section-start with .bss as the sectionname">; + +def Tdata: S<"Tdata">, HelpText<"Same as --section-start with .data as the sectionname">; + +def Ttext: S<"Ttext">, HelpText<"Same as --section-start with .text as the sectionname">; + +def allow_multiple_definition: F<"allow-multiple-definition">, + HelpText<"Allow multiple definitions">; + +def as_needed: F<"as-needed">, + HelpText<"Only set DT_NEEDED for shared libraries if used">; + +def color_diagnostics: F<"color-diagnostics">, + HelpText<"Use colors in diagnostics">; + +def color_diagnostics_eq: J<"color-diagnostics=">, + HelpText<"Use colors in diagnostics">; + +def disable_new_dtags: F<"disable-new-dtags">, + HelpText<"Disable new dynamic tags">; + +def discard_all: F<"discard-all">, HelpText<"Delete all local symbols">; + +def discard_locals: F<"discard-locals">, + HelpText<"Delete temporary local symbols">; + +def discard_none: F<"discard-none">, + HelpText<"Keep all symbols in the symbol table">; + +def dynamic_linker: S<"dynamic-linker">, + HelpText<"Which dynamic linker to use">; + +def dynamic_list: S<"dynamic-list">, + HelpText<"Read a list of dynamic symbols">; + +def eh_frame_hdr: F<"eh-frame-hdr">, + HelpText<"Request creation of .eh_frame_hdr section and PT_GNU_EH_FRAME segment header">; + +def enable_new_dtags: F<"enable-new-dtags">, + HelpText<"Enable new dynamic tags">; + +def end_lib: F<"end-lib">, + HelpText<"End a grouping of objects that should be treated as if they were together in an archive">; + +def entry: S<"entry">, MetaVarName<"<entry>">, + HelpText<"Name of entry point symbol">; + +def error_limit: S<"error-limit">, + HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">; + +def export_dynamic: F<"export-dynamic">, + HelpText<"Put symbols in the dynamic symbol table">; + +def export_dynamic_symbol: S<"export-dynamic-symbol">, + HelpText<"Put a symbol in the dynamic symbol table">; + +def fatal_warnings: F<"fatal-warnings">, + HelpText<"Treat warnings as errors">; + +def fini: S<"fini">, MetaVarName<"<symbol>">, + HelpText<"Specify a finalizer function">; + +def full_shutdown : F<"full-shutdown">, + HelpText<"Perform a full shutdown instead of calling _exit">; + +def format: J<"format=">, MetaVarName<"<input-format>">, + HelpText<"Change the input format of the inputs following this option">; + +def gc_sections: F<"gc-sections">, + HelpText<"Enable garbage collection of unused sections">; + +def gdb_index: F<"gdb-index">, + HelpText<"Generate .gdb_index section">; + +def hash_style: S<"hash-style">, + HelpText<"Specify hash style (sysv, gnu or both)">; + +def help: F<"help">, HelpText<"Print option help">; + +def icf: F<"icf=all">, HelpText<"Enable identical code folding">; + +def image_base : J<"image-base=">, HelpText<"Set the base address">; + +def init: S<"init">, MetaVarName<"<symbol>">, + HelpText<"Specify an initializer function">; + +def l: JoinedOrSeparate<["-"], "l">, MetaVarName<"<libName>">, + HelpText<"Root name of library to use">; + +def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">, + HelpText<"Optimization level for LTO">; + +def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">; + +def nostdlib: F<"nostdlib">, + HelpText<"Only search directories specified on the command line">; + +def no_as_needed: F<"no-as-needed">, + HelpText<"Always DT_NEEDED for shared libraries">; + +def no_color_diagnostics: F<"no-color-diagnostics">, + HelpText<"Do not use colors in diagnostics">; + +def no_demangle: F<"no-demangle">, + HelpText<"Do not demangle symbol names">; + +def no_gc_sections: F<"no-gc-sections">, + HelpText<"Disable garbage collection of unused sections">; + +def no_gnu_unique: F<"no-gnu-unique">, + HelpText<"Disable STB_GNU_UNIQUE symbol binding">; + +def no_threads: F<"no-threads">, + HelpText<"Do not run the linker multi-threaded">; + +def no_whole_archive: F<"no-whole-archive">, + HelpText<"Restores the default behavior of loading archive members">; + +def noinhibit_exec: F<"noinhibit-exec">, + HelpText<"Retain the executable output file whenever it is still usable">; + +def nopie: F<"nopie">, HelpText<"Do not create a position independent executable">; + +def no_rosegment: F<"no-rosegment">, HelpText<"Do not put read-only non-executable sections in their own segment">; + +def no_undefined: F<"no-undefined">, + HelpText<"Report unresolved symbols even if the linker is creating a shared library">; + +def no_undefined_version: F<"no-undefined-version">, + HelpText<"Report version scripts that refer undefined symbols">; + +def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">, + HelpText<"Path to file to write output">; + +def oformat: Separate<["--"], "oformat">, MetaVarName<"<format>">, + HelpText<"Specify the binary format for the output object file">; + +def omagic: F<"omagic">, MetaVarName<"<magic>">, + HelpText<"Set the text and data sections to be readable and writable">; + +def pie: F<"pie">, HelpText<"Create a position independent executable">; + +def print_gc_sections: F<"print-gc-sections">, + HelpText<"List removed unused sections">; + +def reproduce: S<"reproduce">, + HelpText<"Dump linker invocation and input files for debugging">; + +def rpath: S<"rpath">, HelpText<"Add a DT_RUNPATH to the output">; + +def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; + +def retain_symbols_file: J<"retain-symbols-file=">, MetaVarName<"<file>">, + HelpText<"Retain only the symbols listed in the file">; + +def script: S<"script">, HelpText<"Read linker script">; + +def section_start: S<"section-start">, MetaVarName<"<address>">, + HelpText<"Set address of section">; + +def shared: F<"shared">, HelpText<"Build a shared object">; + +def soname: J<"soname=">, HelpText<"Set DT_SONAME">; + +def sort_section: S<"sort-section">, HelpText<"Specifies sections sorting rule when linkerscript is used">; + +def start_lib: F<"start-lib">, + HelpText<"Start a grouping of objects that should be treated as if they were together in an archive">; + +def strip_all: F<"strip-all">, HelpText<"Strip all symbols">; + +def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; + +def symbol_ordering_file: S<"symbol-ordering-file">, + HelpText<"Layout sections in the order specified by symbol file">; + +def sysroot: J<"sysroot=">, HelpText<"Set the system root">; + +def target1_rel: F<"target1-rel">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_REL32">; + +def target1_abs: F<"target1-abs">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_ABS32">; + +def target2: J<"target2=">, MetaVarName<"<type>">, HelpText<"Interpret R_ARM_TARGET2 as <type>, where <type> is one of rel, abs, or got-rel">; + +def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; + +def trace: F<"trace">, HelpText<"Print the names of the input files">; + +def trace_symbol : J<"trace-symbol=">, HelpText<"Trace references to symbols">; + +def undefined: S<"undefined">, + HelpText<"Force undefined symbol during linking">; + +def unresolved_symbols: J<"unresolved-symbols=">, + HelpText<"Determine how to handle unresolved symbols">; + +def rsp_quoting: J<"rsp-quoting=">, + HelpText<"Quoting style for response files. Values supported: windows|posix">; + +def v: Flag<["-"], "v">, HelpText<"Display the version number">; + +def verbose: F<"verbose">, HelpText<"Verbose mode">; + +def version: F<"version">, HelpText<"Display the version number and exit">; + +def version_script: S<"version-script">, + HelpText<"Read a version script">; + +def warn_common: F<"warn-common">, + HelpText<"Warn about duplicate common symbols">; + +def whole_archive: F<"whole-archive">, + HelpText<"Force load of all members in a static library">; + +def wrap: S<"wrap">, MetaVarName<"<symbol>">, + HelpText<"Use wrapper functions for symbol">; + +def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">, + HelpText<"Linker option extensions">; + +// Aliases +def alias_auxiliary: Separate<["-"], "f">, Alias<auxiliary>; +def alias_Bdynamic_call_shared: F<"call_shared">, Alias<Bdynamic>; +def alias_Bdynamic_dy: F<"dy">, Alias<Bdynamic>; +def alias_Bstatic_dn: F<"dn">, Alias<Bstatic>; +def alias_Bstatic_non_shared: F<"non_shared">, Alias<Bstatic>; +def alias_Bstatic_static: F<"static">, Alias<Bstatic>; +def alias_L__library_path: J<"library-path=">, Alias<L>; +def alias_discard_all_x: Flag<["-"], "x">, Alias<discard_all>; +def alias_discard_locals_X: Flag<["-"], "X">, Alias<discard_locals>; +def alias_dynamic_list: J<"dynamic-list=">, Alias<dynamic_list>; +def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>; +def alias_entry_entry: J<"entry=">, Alias<entry>; +def alias_error_limit: J<"error-limit=">, Alias<error_limit>; +def alias_export_dynamic_E: Flag<["-"], "E">, Alias<export_dynamic>; +def alias_export_dynamic_symbol: J<"export-dynamic-symbol=">, + Alias<export_dynamic_symbol>; +def alias_fini_fini: J<"fini=">, Alias<fini>; +def alias_format_b: S<"b">, Alias<format>; +def alias_hash_style_hash_style: J<"hash-style=">, Alias<hash_style>; +def alias_init_init: J<"init=">, Alias<init>; +def alias_l__library: J<"library=">, Alias<l>; +def alias_omagic: Flag<["-"], "N">, Alias<omagic>; +def alias_o_output: Joined<["--"], "output=">, Alias<o>; +def alias_o_output2 : Separate<["--"], "output">, Alias<o>; +def alias_pie_pic_executable: F<"pic-executable">, Alias<pie>; +def alias_relocatable_r: Flag<["-"], "r">, Alias<relocatable>; +def alias_retain_symbols_file: S<"retain-symbols-file">, Alias<retain_symbols_file>; +def alias_rpath_R: JoinedOrSeparate<["-"], "R">, Alias<rpath>; +def alias_rpath_rpath: J<"rpath=">, Alias<rpath>; +def alias_script_T: JoinedOrSeparate<["-"], "T">, Alias<script>; +def alias_shared_Bshareable: F<"Bshareable">, Alias<shared>; +def alias_soname_h: JoinedOrSeparate<["-"], "h">, Alias<soname>; +def alias_soname_soname: S<"soname">, Alias<soname>; +def alias_sort_section: J<"sort-section=">, Alias<sort_section>; +def alias_script: J<"script=">, Alias<script>; +def alias_strip_all: Flag<["-"], "s">, Alias<strip_all>; +def alias_strip_debug_S: Flag<["-"], "S">, Alias<strip_debug>; +def alias_Tbss: J<"Tbss=">, Alias<Tbss>; +def alias_Tdata: J<"Tdata=">, Alias<Tdata>; +def alias_trace: Flag<["-"], "t">, Alias<trace>; +def alias_trace_symbol_y : JoinedOrSeparate<["-"], "y">, Alias<trace_symbol>; +def alias_Ttext: J<"Ttext=">, Alias<Ttext>; +def alias_Ttext_segment: S<"Ttext-segment">, Alias<Ttext>; +def alias_Ttext_segment_eq: J<"Ttext-segment=">, Alias<Ttext>; +def alias_undefined_eq: J<"undefined=">, Alias<undefined>; +def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias<undefined>; +def alias_version_V: Flag<["-"], "V">, Alias<version>; +def alias_wrap_wrap: J<"wrap=">, Alias<wrap>; + +// Our symbol resolution algorithm handles symbols in archive files differently +// than traditional linkers, so we don't need --start-group and --end-group. +// These options are recongized for compatibility but ignored. +def end_group: F<"end-group">; +def end_group_paren: Flag<["-"], ")">; +def start_group: F<"start-group">; +def start_group_paren: Flag<["-"], "(">; + +// Ignore LTO plugin-related options. +// clang -flto passes -plugin and -plugin-opt to the linker. This is required +// for ld.gold and ld.bfd to get LTO working. But it's not for lld which doesn't +// rely on a plugin. Instead of detecting which linker is used on clang side we +// just ignore the option on lld side as it's easier. In fact, the linker could +// be called 'ld' and understanding which linker is used would require parsing of +// --version output. +def plugin: S<"plugin">; +def plugin_eq: J<"plugin=">; +def plugin_opt: S<"plugin-opt">; +def plugin_opt_eq: J<"plugin-opt=">; + +// Options listed below are silently ignored for now for compatibility. +def allow_shlib_undefined: F<"allow-shlib-undefined">; +def cref: Flag<["--"], "cref">; +def define_common: F<"define-common">; +def demangle: F<"demangle">; +def detect_odr_violations: F<"detect-odr-violations">; +def g: Flag<["-"], "g">; +def M: Flag<["-"], "M">; +def Map: JS<"Map">; +def no_add_needed: F<"no-add-needed">; +def no_allow_shlib_undefined: F<"no-allow-shlib-undefined">; +def no_copy_dt_needed_entries: F<"no-copy-dt-needed-entries">, + Alias<no_add_needed>; +def no_dynamic_linker: F<"no-dynamic-linker">; +def no_fatal_warnings: F<"no-fatal-warnings">; +def no_mmap_output_file: F<"no-mmap-output-file">; +def no_warn_common: F<"no-warn-common">; +def no_warn_mismatch: F<"no-warn-mismatch">; +def rpath_link: S<"rpath-link">; +def rpath_link_eq: J<"rpath-link=">; +def sort_common: F<"sort-common">; +def stats: F<"stats">; +def warn_execstack: F<"warn-execstack">; +def warn_shared_textrel: F<"warn-shared-textrel">; +def EB : F<"EB">; +def EL : F<"EL">; +def G: JoinedOrSeparate<["-"], "G">; +def Qy : F<"Qy">; + +// Aliases for ignored options +def alias_define_common_d: Flag<["-"], "d">, Alias<define_common>; +def alias_define_common_dc: F<"dc">, Alias<define_common>; +def alias_define_common_dp: F<"dp">, Alias<define_common>; +def alias_Map_eq: J<"Map=">, Alias<Map>; +def alias_version_script_version_script: J<"version-script=">, + Alias<version_script>; + +// LTO-related options. +def lto_aa_pipeline: J<"lto-aa-pipeline=">, + HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">; +def lto_newpm_passes: J<"lto-newpm-passes=">, + HelpText<"Passes to run during LTO">; +def lto_partitions: J<"lto-partitions=">, + HelpText<"Number of LTO codegen partitions">; +def disable_verify: F<"disable-verify">; +def mllvm: S<"mllvm">; +def save_temps: F<"save-temps">; +def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">; diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.cpp b/contrib/llvm/tools/lld/ELF/OutputSections.cpp new file mode 100644 index 000000000000..a9d951dcc745 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/OutputSections.cpp @@ -0,0 +1,707 @@ +//===- OutputSections.cpp -------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "OutputSections.h" +#include "Config.h" +#include "EhFrame.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Threads.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/SHA1.h" + +using namespace llvm; +using namespace llvm::dwarf; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +OutputSectionBase::OutputSectionBase(StringRef Name, uint32_t Type, + uint64_t Flags) + : Name(Name) { + this->Type = Type; + this->Flags = Flags; + this->Addralign = 1; +} + +uint32_t OutputSectionBase::getPhdrFlags() const { + uint32_t Ret = PF_R; + if (Flags & SHF_WRITE) + Ret |= PF_W; + if (Flags & SHF_EXECINSTR) + Ret |= PF_X; + return Ret; +} + +template <class ELFT> +void OutputSectionBase::writeHeaderTo(typename ELFT::Shdr *Shdr) { + Shdr->sh_entsize = Entsize; + Shdr->sh_addralign = Addralign; + Shdr->sh_type = Type; + Shdr->sh_offset = Offset; + Shdr->sh_flags = Flags; + Shdr->sh_info = Info; + Shdr->sh_link = Link; + Shdr->sh_addr = Addr; + Shdr->sh_size = Size; + Shdr->sh_name = ShName; +} + +template <class ELFT> static uint64_t getEntsize(uint32_t Type) { + switch (Type) { + case SHT_RELA: + return sizeof(typename ELFT::Rela); + case SHT_REL: + return sizeof(typename ELFT::Rel); + case SHT_MIPS_REGINFO: + return sizeof(Elf_Mips_RegInfo<ELFT>); + case SHT_MIPS_OPTIONS: + return sizeof(Elf_Mips_Options<ELFT>) + sizeof(Elf_Mips_RegInfo<ELFT>); + case SHT_MIPS_ABIFLAGS: + return sizeof(Elf_Mips_ABIFlags<ELFT>); + default: + return 0; + } +} + +template <class ELFT> +OutputSection<ELFT>::OutputSection(StringRef Name, uint32_t Type, uintX_t Flags) + : OutputSectionBase(Name, Type, Flags) { + this->Entsize = getEntsize<ELFT>(Type); +} + +template <typename ELFT> +static bool compareByFilePosition(InputSection<ELFT> *A, + InputSection<ELFT> *B) { + // Synthetic doesn't have link order dependecy, stable_sort will keep it last + if (A->kind() == InputSectionData::Synthetic || + B->kind() == InputSectionData::Synthetic) + return false; + auto *LA = cast<InputSection<ELFT>>(A->getLinkOrderDep()); + auto *LB = cast<InputSection<ELFT>>(B->getLinkOrderDep()); + OutputSectionBase *AOut = LA->OutSec; + OutputSectionBase *BOut = LB->OutSec; + if (AOut != BOut) + return AOut->SectionIndex < BOut->SectionIndex; + return LA->OutSecOff < LB->OutSecOff; +} + +template <class ELFT> void OutputSection<ELFT>::finalize() { + if ((this->Flags & SHF_LINK_ORDER) && !this->Sections.empty()) { + std::sort(Sections.begin(), Sections.end(), compareByFilePosition<ELFT>); + Size = 0; + assignOffsets(); + + // We must preserve the link order dependency of sections with the + // SHF_LINK_ORDER flag. The dependency is indicated by the sh_link field. We + // need to translate the InputSection sh_link to the OutputSection sh_link, + // all InputSections in the OutputSection have the same dependency. + if (auto *D = this->Sections.front()->getLinkOrderDep()) + this->Link = D->OutSec->SectionIndex; + } + + uint32_t Type = this->Type; + if (!Config->Relocatable || (Type != SHT_RELA && Type != SHT_REL)) + return; + + this->Link = In<ELFT>::SymTab->OutSec->SectionIndex; + // sh_info for SHT_REL[A] sections should contain the section header index of + // the section to which the relocation applies. + InputSectionBase<ELFT> *S = Sections[0]->getRelocatedSection(); + this->Info = S->OutSec->SectionIndex; +} + +template <class ELFT> +void OutputSection<ELFT>::addSection(InputSectionData *C) { + assert(C->Live); + auto *S = cast<InputSection<ELFT>>(C); + Sections.push_back(S); + S->OutSec = this; + this->updateAlignment(S->Alignment); + // Keep sh_entsize value of the input section to be able to perform merging + // later during a final linking using the generated relocatable object. + if (Config->Relocatable && (S->Flags & SHF_MERGE)) + this->Entsize = S->Entsize; +} + +// This function is called after we sort input sections +// and scan relocations to setup sections' offsets. +template <class ELFT> void OutputSection<ELFT>::assignOffsets() { + uintX_t Off = this->Size; + for (InputSection<ELFT> *S : Sections) { + Off = alignTo(Off, S->Alignment); + S->OutSecOff = Off; + Off += S->getSize(); + } + this->Size = Off; +} + +template <class ELFT> +void OutputSection<ELFT>::sort( + std::function<int(InputSection<ELFT> *S)> Order) { + typedef std::pair<unsigned, InputSection<ELFT> *> Pair; + auto Comp = [](const Pair &A, const Pair &B) { return A.first < B.first; }; + + std::vector<Pair> V; + for (InputSection<ELFT> *S : Sections) + V.push_back({Order(S), S}); + std::stable_sort(V.begin(), V.end(), Comp); + Sections.clear(); + for (Pair &P : V) + Sections.push_back(P.second); +} + +// Sorts input sections by section name suffixes, so that .foo.N comes +// before .foo.M if N < M. Used to sort .{init,fini}_array.N sections. +// We want to keep the original order if the priorities are the same +// because the compiler keeps the original initialization order in a +// translation unit and we need to respect that. +// For more detail, read the section of the GCC's manual about init_priority. +template <class ELFT> void OutputSection<ELFT>::sortInitFini() { + // Sort sections by priority. + sort([](InputSection<ELFT> *S) { return getPriority(S->Name); }); +} + +// Returns true if S matches /Filename.?\.o$/. +static bool isCrtBeginEnd(StringRef S, StringRef Filename) { + if (!S.endswith(".o")) + return false; + S = S.drop_back(2); + if (S.endswith(Filename)) + return true; + return !S.empty() && S.drop_back().endswith(Filename); +} + +static bool isCrtbegin(StringRef S) { return isCrtBeginEnd(S, "crtbegin"); } +static bool isCrtend(StringRef S) { return isCrtBeginEnd(S, "crtend"); } + +// .ctors and .dtors are sorted by this priority from highest to lowest. +// +// 1. The section was contained in crtbegin (crtbegin contains +// some sentinel value in its .ctors and .dtors so that the runtime +// can find the beginning of the sections.) +// +// 2. The section has an optional priority value in the form of ".ctors.N" +// or ".dtors.N" where N is a number. Unlike .{init,fini}_array, +// they are compared as string rather than number. +// +// 3. The section is just ".ctors" or ".dtors". +// +// 4. The section was contained in crtend, which contains an end marker. +// +// In an ideal world, we don't need this function because .init_array and +// .ctors are duplicate features (and .init_array is newer.) However, there +// are too many real-world use cases of .ctors, so we had no choice to +// support that with this rather ad-hoc semantics. +template <class ELFT> +static bool compCtors(const InputSection<ELFT> *A, + const InputSection<ELFT> *B) { + bool BeginA = isCrtbegin(A->getFile()->getName()); + bool BeginB = isCrtbegin(B->getFile()->getName()); + if (BeginA != BeginB) + return BeginA; + bool EndA = isCrtend(A->getFile()->getName()); + bool EndB = isCrtend(B->getFile()->getName()); + if (EndA != EndB) + return EndB; + StringRef X = A->Name; + StringRef Y = B->Name; + assert(X.startswith(".ctors") || X.startswith(".dtors")); + assert(Y.startswith(".ctors") || Y.startswith(".dtors")); + X = X.substr(6); + Y = Y.substr(6); + if (X.empty() && Y.empty()) + return false; + return X < Y; +} + +// Sorts input sections by the special rules for .ctors and .dtors. +// Unfortunately, the rules are different from the one for .{init,fini}_array. +// Read the comment above. +template <class ELFT> void OutputSection<ELFT>::sortCtorsDtors() { + std::stable_sort(Sections.begin(), Sections.end(), compCtors<ELFT>); +} + +// Fill [Buf, Buf + Size) with Filler. Filler is written in big +// endian order. This is used for linker script "=fillexp" command. +void fill(uint8_t *Buf, size_t Size, uint32_t Filler) { + uint8_t V[4]; + write32be(V, Filler); + size_t I = 0; + for (; I + 4 < Size; I += 4) + memcpy(Buf + I, V, 4); + memcpy(Buf + I, V, Size - I); +} + +template <class ELFT> void OutputSection<ELFT>::writeTo(uint8_t *Buf) { + Loc = Buf; + if (uint32_t Filler = Script<ELFT>::X->getFiller(this->Name)) + fill(Buf, this->Size, Filler); + + auto Fn = [=](InputSection<ELFT> *IS) { IS->writeTo(Buf); }; + forEach(Sections.begin(), Sections.end(), Fn); + + // Linker scripts may have BYTE()-family commands with which you + // can write arbitrary bytes to the output. Process them if any. + Script<ELFT>::X->writeDataBytes(this->Name, Buf); +} + +template <class ELFT> +EhOutputSection<ELFT>::EhOutputSection() + : OutputSectionBase(".eh_frame", SHT_PROGBITS, SHF_ALLOC) {} + +// Search for an existing CIE record or create a new one. +// CIE records from input object files are uniquified by their contents +// and where their relocations point to. +template <class ELFT> +template <class RelTy> +CieRecord *EhOutputSection<ELFT>::addCie(EhSectionPiece &Piece, + ArrayRef<RelTy> Rels) { + auto *Sec = cast<EhInputSection<ELFT>>(Piece.ID); + const endianness E = ELFT::TargetEndianness; + if (read32<E>(Piece.data().data() + 4) != 0) + fatal(toString(Sec) + ": CIE expected at beginning of .eh_frame"); + + SymbolBody *Personality = nullptr; + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI != (unsigned)-1) + Personality = &Sec->getFile()->getRelocTargetSym(Rels[FirstRelI]); + + // Search for an existing CIE by CIE contents/relocation target pair. + CieRecord *Cie = &CieMap[{Piece.data(), Personality}]; + + // If not found, create a new one. + if (Cie->Piece == nullptr) { + Cie->Piece = &Piece; + Cies.push_back(Cie); + } + return Cie; +} + +// There is one FDE per function. Returns true if a given FDE +// points to a live function. +template <class ELFT> +template <class RelTy> +bool EhOutputSection<ELFT>::isFdeLive(EhSectionPiece &Piece, + ArrayRef<RelTy> Rels) { + auto *Sec = cast<EhInputSection<ELFT>>(Piece.ID); + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI == (unsigned)-1) + fatal(toString(Sec) + ": FDE doesn't reference another section"); + const RelTy &Rel = Rels[FirstRelI]; + SymbolBody &B = Sec->getFile()->getRelocTargetSym(Rel); + auto *D = dyn_cast<DefinedRegular<ELFT>>(&B); + if (!D || !D->Section) + return false; + InputSectionBase<ELFT> *Target = D->Section->Repl; + return Target && Target->Live; +} + +// .eh_frame is a sequence of CIE or FDE records. In general, there +// is one CIE record per input object file which is followed by +// a list of FDEs. This function searches an existing CIE or create a new +// one and associates FDEs to the CIE. +template <class ELFT> +template <class RelTy> +void EhOutputSection<ELFT>::addSectionAux(EhInputSection<ELFT> *Sec, + ArrayRef<RelTy> Rels) { + const endianness E = ELFT::TargetEndianness; + + DenseMap<size_t, CieRecord *> OffsetToCie; + for (EhSectionPiece &Piece : Sec->Pieces) { + // The empty record is the end marker. + if (Piece.size() == 4) + return; + + size_t Offset = Piece.InputOff; + uint32_t ID = read32<E>(Piece.data().data() + 4); + if (ID == 0) { + OffsetToCie[Offset] = addCie(Piece, Rels); + continue; + } + + uint32_t CieOffset = Offset + 4 - ID; + CieRecord *Cie = OffsetToCie[CieOffset]; + if (!Cie) + fatal(toString(Sec) + ": invalid CIE reference"); + + if (!isFdeLive(Piece, Rels)) + continue; + Cie->FdePieces.push_back(&Piece); + NumFdes++; + } +} + +template <class ELFT> +void EhOutputSection<ELFT>::addSection(InputSectionData *C) { + auto *Sec = cast<EhInputSection<ELFT>>(C); + Sec->OutSec = this; + this->updateAlignment(Sec->Alignment); + Sections.push_back(Sec); + + // .eh_frame is a sequence of CIE or FDE records. This function + // splits it into pieces so that we can call + // SplitInputSection::getSectionPiece on the section. + Sec->split(); + if (Sec->Pieces.empty()) + return; + + if (Sec->NumRelocations) { + if (Sec->AreRelocsRela) + addSectionAux(Sec, Sec->relas()); + else + addSectionAux(Sec, Sec->rels()); + return; + } + addSectionAux(Sec, makeArrayRef<Elf_Rela>(nullptr, nullptr)); +} + +template <class ELFT> +static void writeCieFde(uint8_t *Buf, ArrayRef<uint8_t> D) { + memcpy(Buf, D.data(), D.size()); + + // Fix the size field. -4 since size does not include the size field itself. + const endianness E = ELFT::TargetEndianness; + write32<E>(Buf, alignTo(D.size(), sizeof(typename ELFT::uint)) - 4); +} + +template <class ELFT> void EhOutputSection<ELFT>::finalize() { + if (this->Size) + return; // Already finalized. + + size_t Off = 0; + for (CieRecord *Cie : Cies) { + Cie->Piece->OutputOff = Off; + Off += alignTo(Cie->Piece->size(), sizeof(uintX_t)); + + for (EhSectionPiece *Fde : Cie->FdePieces) { + Fde->OutputOff = Off; + Off += alignTo(Fde->size(), sizeof(uintX_t)); + } + } + this->Size = Off; +} + +template <class ELFT> static uint64_t readFdeAddr(uint8_t *Buf, int Size) { + const endianness E = ELFT::TargetEndianness; + switch (Size) { + case DW_EH_PE_udata2: + return read16<E>(Buf); + case DW_EH_PE_udata4: + return read32<E>(Buf); + case DW_EH_PE_udata8: + return read64<E>(Buf); + case DW_EH_PE_absptr: + if (ELFT::Is64Bits) + return read64<E>(Buf); + return read32<E>(Buf); + } + fatal("unknown FDE size encoding"); +} + +// Returns the VA to which a given FDE (on a mmap'ed buffer) is applied to. +// We need it to create .eh_frame_hdr section. +template <class ELFT> +typename ELFT::uint EhOutputSection<ELFT>::getFdePc(uint8_t *Buf, size_t FdeOff, + uint8_t Enc) { + // The starting address to which this FDE applies is + // stored at FDE + 8 byte. + size_t Off = FdeOff + 8; + uint64_t Addr = readFdeAddr<ELFT>(Buf + Off, Enc & 0x7); + if ((Enc & 0x70) == DW_EH_PE_absptr) + return Addr; + if ((Enc & 0x70) == DW_EH_PE_pcrel) + return Addr + this->Addr + Off; + fatal("unknown FDE size relative encoding"); +} + +template <class ELFT> void EhOutputSection<ELFT>::writeTo(uint8_t *Buf) { + const endianness E = ELFT::TargetEndianness; + for (CieRecord *Cie : Cies) { + size_t CieOffset = Cie->Piece->OutputOff; + writeCieFde<ELFT>(Buf + CieOffset, Cie->Piece->data()); + + for (EhSectionPiece *Fde : Cie->FdePieces) { + size_t Off = Fde->OutputOff; + writeCieFde<ELFT>(Buf + Off, Fde->data()); + + // FDE's second word should have the offset to an associated CIE. + // Write it. + write32<E>(Buf + Off + 4, Off + 4 - CieOffset); + } + } + + for (EhInputSection<ELFT> *S : Sections) + S->relocate(Buf, nullptr); + + // Construct .eh_frame_hdr. .eh_frame_hdr is a binary search table + // to get a FDE from an address to which FDE is applied. So here + // we obtain two addresses and pass them to EhFrameHdr object. + if (In<ELFT>::EhFrameHdr) { + for (CieRecord *Cie : Cies) { + uint8_t Enc = getFdeEncoding<ELFT>(Cie->Piece); + for (SectionPiece *Fde : Cie->FdePieces) { + uintX_t Pc = getFdePc(Buf, Fde->OutputOff, Enc); + uintX_t FdeVA = this->Addr + Fde->OutputOff; + In<ELFT>::EhFrameHdr->addFde(Pc, FdeVA); + } + } + } +} + +template <class ELFT> +MergeOutputSection<ELFT>::MergeOutputSection(StringRef Name, uint32_t Type, + uintX_t Flags, uintX_t Alignment) + : OutputSectionBase(Name, Type, Flags), + Builder(StringTableBuilder::RAW, Alignment) {} + +template <class ELFT> void MergeOutputSection<ELFT>::writeTo(uint8_t *Buf) { + Builder.write(Buf); +} + +template <class ELFT> +void MergeOutputSection<ELFT>::addSection(InputSectionData *C) { + auto *Sec = cast<MergeInputSection<ELFT>>(C); + Sec->OutSec = this; + this->updateAlignment(Sec->Alignment); + this->Entsize = Sec->Entsize; + Sections.push_back(Sec); +} + +template <class ELFT> bool MergeOutputSection<ELFT>::shouldTailMerge() const { + return (this->Flags & SHF_STRINGS) && Config->Optimize >= 2; +} + +template <class ELFT> void MergeOutputSection<ELFT>::finalizeTailMerge() { + // Add all string pieces to the string table builder to create section + // contents. + for (MergeInputSection<ELFT> *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Builder.add(Sec->getData(I)); + + // Fix the string table content. After this, the contents will never change. + Builder.finalize(); + this->Size = Builder.getSize(); + + // finalize() fixed tail-optimized strings, so we can now get + // offsets of strings. Get an offset for each string and save it + // to a corresponding StringPiece for easy access. + for (MergeInputSection<ELFT> *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); +} + +template <class ELFT> void MergeOutputSection<ELFT>::finalizeNoTailMerge() { + // Add all string pieces to the string table builder to create section + // contents. Because we are not tail-optimizing, offsets of strings are + // fixed when they are added to the builder (string table builder contains + // a hash table from strings to offsets). + for (MergeInputSection<ELFT> *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Sec->Pieces[I].OutputOff = Builder.add(Sec->getData(I)); + + Builder.finalizeInOrder(); + this->Size = Builder.getSize(); +} + +template <class ELFT> void MergeOutputSection<ELFT>::finalize() { + if (shouldTailMerge()) + finalizeTailMerge(); + else + finalizeNoTailMerge(); +} + +template <class ELFT> +static typename ELFT::uint getOutFlags(InputSectionBase<ELFT> *S) { + return S->Flags & ~SHF_GROUP & ~SHF_COMPRESSED; +} + +namespace llvm { +template <> struct DenseMapInfo<lld::elf::SectionKey> { + static lld::elf::SectionKey getEmptyKey(); + static lld::elf::SectionKey getTombstoneKey(); + static unsigned getHashValue(const lld::elf::SectionKey &Val); + static bool isEqual(const lld::elf::SectionKey &LHS, + const lld::elf::SectionKey &RHS); +}; +} + +template <class ELFT> +static SectionKey createKey(InputSectionBase<ELFT> *C, StringRef OutsecName) { + // The ELF spec just says + // ---------------------------------------------------------------- + // In the first phase, input sections that match in name, type and + // attribute flags should be concatenated into single sections. + // ---------------------------------------------------------------- + // + // However, it is clear that at least some flags have to be ignored for + // section merging. At the very least SHF_GROUP and SHF_COMPRESSED have to be + // ignored. We should not have two output .text sections just because one was + // in a group and another was not for example. + // + // It also seems that that wording was a late addition and didn't get the + // necessary scrutiny. + // + // Merging sections with different flags is expected by some users. One + // reason is that if one file has + // + // int *const bar __attribute__((section(".foo"))) = (int *)0; + // + // gcc with -fPIC will produce a read only .foo section. But if another + // file has + // + // int zed; + // int *const bar __attribute__((section(".foo"))) = (int *)&zed; + // + // gcc with -fPIC will produce a read write section. + // + // Last but not least, when using linker script the merge rules are forced by + // the script. Unfortunately, linker scripts are name based. This means that + // expressions like *(.foo*) can refer to multiple input sections with + // different flags. We cannot put them in different output sections or we + // would produce wrong results for + // + // start = .; *(.foo.*) end = .; *(.bar) + // + // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to + // another. The problem is that there is no way to layout those output + // sections such that the .foo sections are the only thing between the start + // and end symbols. + // + // Given the above issues, we instead merge sections by name and error on + // incompatible types and flags. + // + // The exception being SHF_MERGE, where we create different output sections + // for each alignment. This makes each output section simple. In case of + // relocatable object generation we do not try to perform merging and treat + // SHF_MERGE sections as regular ones, but also create different output + // sections for them to allow merging at final linking stage. + // + // Fortunately, creating symbols in the middle of a merge section is not + // supported by bfd or gold, so the SHF_MERGE exception should not cause + // problems with most linker scripts. + + typedef typename ELFT::uint uintX_t; + uintX_t Flags = C->Flags & (SHF_MERGE | SHF_STRINGS); + + uintX_t Alignment = 0; + if (isa<MergeInputSection<ELFT>>(C) || + (Config->Relocatable && (C->Flags & SHF_MERGE))) + Alignment = std::max<uintX_t>(C->Alignment, C->Entsize); + + return SectionKey{OutsecName, Flags, Alignment}; +} + +template <class ELFT> OutputSectionFactory<ELFT>::OutputSectionFactory() {} + +template <class ELFT> OutputSectionFactory<ELFT>::~OutputSectionFactory() {} + +template <class ELFT> +std::pair<OutputSectionBase *, bool> +OutputSectionFactory<ELFT>::create(InputSectionBase<ELFT> *C, + StringRef OutsecName) { + SectionKey Key = createKey(C, OutsecName); + return create(Key, C); +} + +static uint64_t getIncompatibleFlags(uint64_t Flags) { + return Flags & (SHF_ALLOC | SHF_TLS); +} + +template <class ELFT> +std::pair<OutputSectionBase *, bool> +OutputSectionFactory<ELFT>::create(const SectionKey &Key, + InputSectionBase<ELFT> *C) { + uintX_t Flags = getOutFlags(C); + OutputSectionBase *&Sec = Map[Key]; + if (Sec) { + if (getIncompatibleFlags(Sec->Flags) != getIncompatibleFlags(C->Flags)) + error("Section has flags incompatible with others with the same name " + + toString(C)); + if (Sec->Type != C->Type) + error("Section has different type from others with the same name " + + toString(C)); + Sec->Flags |= Flags; + return {Sec, false}; + } + + uint32_t Type = C->Type; + switch (C->kind()) { + case InputSectionBase<ELFT>::Regular: + case InputSectionBase<ELFT>::Synthetic: + Sec = make<OutputSection<ELFT>>(Key.Name, Type, Flags); + break; + case InputSectionBase<ELFT>::EHFrame: + return {Out<ELFT>::EhFrame, false}; + case InputSectionBase<ELFT>::Merge: + Sec = make<MergeOutputSection<ELFT>>(Key.Name, Type, Flags, Key.Alignment); + break; + } + return {Sec, true}; +} + +SectionKey DenseMapInfo<SectionKey>::getEmptyKey() { + return SectionKey{DenseMapInfo<StringRef>::getEmptyKey(), 0, 0}; +} + +SectionKey DenseMapInfo<SectionKey>::getTombstoneKey() { + return SectionKey{DenseMapInfo<StringRef>::getTombstoneKey(), 0, 0}; +} + +unsigned DenseMapInfo<SectionKey>::getHashValue(const SectionKey &Val) { + return hash_combine(Val.Name, Val.Flags, Val.Alignment); +} + +bool DenseMapInfo<SectionKey>::isEqual(const SectionKey &LHS, + const SectionKey &RHS) { + return DenseMapInfo<StringRef>::isEqual(LHS.Name, RHS.Name) && + LHS.Flags == RHS.Flags && LHS.Alignment == RHS.Alignment; +} + +namespace lld { +namespace elf { + +template void OutputSectionBase::writeHeaderTo<ELF32LE>(ELF32LE::Shdr *Shdr); +template void OutputSectionBase::writeHeaderTo<ELF32BE>(ELF32BE::Shdr *Shdr); +template void OutputSectionBase::writeHeaderTo<ELF64LE>(ELF64LE::Shdr *Shdr); +template void OutputSectionBase::writeHeaderTo<ELF64BE>(ELF64BE::Shdr *Shdr); + +template class OutputSection<ELF32LE>; +template class OutputSection<ELF32BE>; +template class OutputSection<ELF64LE>; +template class OutputSection<ELF64BE>; + +template class EhOutputSection<ELF32LE>; +template class EhOutputSection<ELF32BE>; +template class EhOutputSection<ELF64LE>; +template class EhOutputSection<ELF64BE>; + +template class MergeOutputSection<ELF32LE>; +template class MergeOutputSection<ELF32BE>; +template class MergeOutputSection<ELF64LE>; +template class MergeOutputSection<ELF64BE>; + +template class OutputSectionFactory<ELF32LE>; +template class OutputSectionFactory<ELF32BE>; +template class OutputSectionFactory<ELF64LE>; +template class OutputSectionFactory<ELF64BE>; +} +} diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.h b/contrib/llvm/tools/lld/ELF/OutputSections.h new file mode 100644 index 000000000000..45e1a232e2a9 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/OutputSections.h @@ -0,0 +1,268 @@ +//===- OutputSections.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_OUTPUT_SECTIONS_H +#define LLD_ELF_OUTPUT_SECTIONS_H + +#include "Config.h" +#include "Relocations.h" + +#include "lld/Core/LLVM.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ELF.h" + +namespace lld { +namespace elf { + +struct PhdrEntry; +class SymbolBody; +struct EhSectionPiece; +template <class ELFT> class EhInputSection; +template <class ELFT> class InputSection; +template <class ELFT> class InputSectionBase; +template <class ELFT> class MergeInputSection; +template <class ELFT> class OutputSection; +template <class ELFT> class ObjectFile; +template <class ELFT> class SharedFile; +template <class ELFT> class SharedSymbol; +template <class ELFT> class DefinedRegular; + +// This represents a section in an output file. +// Different sub classes represent different types of sections. Some contain +// input sections, others are created by the linker. +// The writer creates multiple OutputSections and assign them unique, +// non-overlapping file offsets and VAs. +class OutputSectionBase { +public: + enum Kind { + Base, + EHFrame, + Merge, + Regular, + }; + + OutputSectionBase(StringRef Name, uint32_t Type, uint64_t Flags); + void setLMAOffset(uint64_t LMAOff) { LMAOffset = LMAOff; } + uint64_t getLMA() const { return Addr + LMAOffset; } + template <typename ELFT> void writeHeaderTo(typename ELFT::Shdr *SHdr); + StringRef getName() const { return Name; } + + virtual void addSection(InputSectionData *C) {} + virtual Kind getKind() const { return Base; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == Base; + } + + unsigned SectionIndex; + + uint32_t getPhdrFlags() const; + + void updateAlignment(uint64_t Alignment) { + if (Alignment > Addralign) + Addralign = Alignment; + } + + // If true, this section will be page aligned on disk. + // Typically the first section of each PT_LOAD segment has this flag. + bool PageAlign = false; + + // Pointer to the first section in PT_LOAD segment, which this section + // also resides in. This field is used to correctly compute file offset + // of a section. When two sections share the same load segment, difference + // between their file offsets should be equal to difference between their + // virtual addresses. To compute some section offset we use the following + // formula: Off = Off_first + VA - VA_first. + OutputSectionBase *FirstInPtLoad = nullptr; + + virtual void finalize() {} + virtual void assignOffsets() {} + virtual void writeTo(uint8_t *Buf) {} + virtual ~OutputSectionBase() = default; + + StringRef Name; + + // The following fields correspond to Elf_Shdr members. + uint64_t Size = 0; + uint64_t Entsize = 0; + uint64_t Addralign = 0; + uint64_t Offset = 0; + uint64_t Flags = 0; + uint64_t LMAOffset = 0; + uint64_t Addr = 0; + uint32_t ShName = 0; + uint32_t Type = 0; + uint32_t Info = 0; + uint32_t Link = 0; +}; + +template <class ELFT> class OutputSection final : public OutputSectionBase { + +public: + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::uint uintX_t; + OutputSection(StringRef Name, uint32_t Type, uintX_t Flags); + void addSection(InputSectionData *C) override; + void sort(std::function<int(InputSection<ELFT> *S)> Order); + void sortInitFini(); + void sortCtorsDtors(); + void writeTo(uint8_t *Buf) override; + void finalize() override; + void assignOffsets() override; + Kind getKind() const override { return Regular; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == Regular; + } + std::vector<InputSection<ELFT> *> Sections; + + // Location in the output buffer. + uint8_t *Loc = nullptr; +}; + +template <class ELFT> +class MergeOutputSection final : public OutputSectionBase { + typedef typename ELFT::uint uintX_t; + +public: + MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags, + uintX_t Alignment); + void addSection(InputSectionData *S) override; + void writeTo(uint8_t *Buf) override; + void finalize() override; + bool shouldTailMerge() const; + Kind getKind() const override { return Merge; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == Merge; + } + +private: + void finalizeTailMerge(); + void finalizeNoTailMerge(); + + llvm::StringTableBuilder Builder; + std::vector<MergeInputSection<ELFT> *> Sections; +}; + +struct CieRecord { + EhSectionPiece *Piece = nullptr; + std::vector<EhSectionPiece *> FdePieces; +}; + +// Output section for .eh_frame. +template <class ELFT> class EhOutputSection final : public OutputSectionBase { + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + +public: + EhOutputSection(); + void writeTo(uint8_t *Buf) override; + void finalize() override; + bool empty() const { return Sections.empty(); } + + void addSection(InputSectionData *S) override; + Kind getKind() const override { return EHFrame; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == EHFrame; + } + + size_t NumFdes = 0; + +private: + template <class RelTy> + void addSectionAux(EhInputSection<ELFT> *S, llvm::ArrayRef<RelTy> Rels); + + template <class RelTy> + CieRecord *addCie(EhSectionPiece &Piece, ArrayRef<RelTy> Rels); + + template <class RelTy> + bool isFdeLive(EhSectionPiece &Piece, ArrayRef<RelTy> Rels); + + uintX_t getFdePc(uint8_t *Buf, size_t Off, uint8_t Enc); + + std::vector<EhInputSection<ELFT> *> Sections; + std::vector<CieRecord *> Cies; + + // CIE records are uniquified by their contents and personality functions. + llvm::DenseMap<std::pair<ArrayRef<uint8_t>, SymbolBody *>, CieRecord> CieMap; +}; + +// All output sections that are hadnled by the linker specially are +// globally accessible. Writer initializes them, so don't use them +// until Writer is initialized. +template <class ELFT> struct Out { + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Phdr Elf_Phdr; + + static uint8_t First; + static EhOutputSection<ELFT> *EhFrame; + static OutputSection<ELFT> *Bss; + static OutputSectionBase *Opd; + static uint8_t *OpdBuf; + static PhdrEntry *TlsPhdr; + static OutputSectionBase *DebugInfo; + static OutputSectionBase *ElfHeader; + static OutputSectionBase *ProgramHeaders; + static OutputSectionBase *PreinitArray; + static OutputSectionBase *InitArray; + static OutputSectionBase *FiniArray; +}; + +struct SectionKey { + StringRef Name; + uint64_t Flags; + uint64_t Alignment; +}; + +// This class knows how to create an output section for a given +// input section. Output section type is determined by various +// factors, including input section's sh_flags, sh_type and +// linker scripts. +template <class ELFT> class OutputSectionFactory { + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::uint uintX_t; + +public: + OutputSectionFactory(); + ~OutputSectionFactory(); + std::pair<OutputSectionBase *, bool> create(InputSectionBase<ELFT> *C, + StringRef OutsecName); + std::pair<OutputSectionBase *, bool> create(const SectionKey &Key, + InputSectionBase<ELFT> *C); + +private: + llvm::SmallDenseMap<SectionKey, OutputSectionBase *> Map; +}; + +template <class ELFT> uint64_t getHeaderSize() { + if (Config->OFormatBinary) + return 0; + return Out<ELFT>::ElfHeader->Size + Out<ELFT>::ProgramHeaders->Size; +} + +template <class ELFT> uint8_t Out<ELFT>::First; +template <class ELFT> EhOutputSection<ELFT> *Out<ELFT>::EhFrame; +template <class ELFT> OutputSection<ELFT> *Out<ELFT>::Bss; +template <class ELFT> OutputSectionBase *Out<ELFT>::Opd; +template <class ELFT> uint8_t *Out<ELFT>::OpdBuf; +template <class ELFT> PhdrEntry *Out<ELFT>::TlsPhdr; +template <class ELFT> OutputSectionBase *Out<ELFT>::DebugInfo; +template <class ELFT> OutputSectionBase *Out<ELFT>::ElfHeader; +template <class ELFT> OutputSectionBase *Out<ELFT>::ProgramHeaders; +template <class ELFT> OutputSectionBase *Out<ELFT>::PreinitArray; +template <class ELFT> OutputSectionBase *Out<ELFT>::InitArray; +template <class ELFT> OutputSectionBase *Out<ELFT>::FiniArray; +} // namespace elf +} // namespace lld + + +#endif diff --git a/contrib/llvm/tools/lld/ELF/README.md b/contrib/llvm/tools/lld/ELF/README.md new file mode 100644 index 000000000000..f1bfc9c15263 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/README.md @@ -0,0 +1 @@ +See docs/NewLLD.rst diff --git a/contrib/llvm/tools/lld/ELF/Relocations.cpp b/contrib/llvm/tools/lld/ELF/Relocations.cpp new file mode 100644 index 000000000000..f7dcc5d24e93 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Relocations.cpp @@ -0,0 +1,825 @@ +//===- Relocations.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains platform-independent functions to process relocations. +// I'll describe the overview of this file here. +// +// Simple relocations are easy to handle for the linker. For example, +// for R_X86_64_PC64 relocs, the linker just has to fix up locations +// with the relative offsets to the target symbols. It would just be +// reading records from relocation sections and applying them to output. +// +// But not all relocations are that easy to handle. For example, for +// R_386_GOTOFF relocs, the linker has to create new GOT entries for +// symbols if they don't exist, and fix up locations with GOT entry +// offsets from the beginning of GOT section. So there is more than +// fixing addresses in relocation processing. +// +// ELF defines a large number of complex relocations. +// +// The functions in this file analyze relocations and do whatever needs +// to be done. It includes, but not limited to, the following. +// +// - create GOT/PLT entries +// - create new relocations in .dynsym to let the dynamic linker resolve +// them at runtime (since ELF supports dynamic linking, not all +// relocations can be resolved at link-time) +// - create COPY relocs and reserve space in .bss +// - replace expensive relocs (in terms of runtime cost) with cheap ones +// - error out infeasible combinations such as PIC and non-relative relocs +// +// Note that the functions in this file don't actually apply relocations +// because it doesn't know about the output file nor the output file buffer. +// It instead stores Relocation objects to InputSection's Relocations +// vector to let it apply later in InputSection::writeTo. +// +//===----------------------------------------------------------------------===// + +#include "Relocations.h" +#include "Config.h" +#include "OutputSections.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Thunks.h" + +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support::endian; + +namespace lld { +namespace elf { + +static bool refersToGotEntry(RelExpr Expr) { + return isRelExprOneOf<R_GOT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOT_OFF, + R_MIPS_GOT_OFF32, R_MIPS_TLSGD, R_MIPS_TLSLD, + R_GOT_PAGE_PC, R_GOT_PC, R_GOT_FROM_END, R_TLSGD, + R_TLSGD_PC, R_TLSDESC, R_TLSDESC_PAGE>(Expr); +} + +static bool isPreemptible(const SymbolBody &Body, uint32_t Type) { + // In case of MIPS GP-relative relocations always resolve to a definition + // in a regular input file, ignoring the one-definition rule. So we, + // for example, should not attempt to create a dynamic relocation even + // if the target symbol is preemptible. There are two two MIPS GP-relative + // relocations R_MIPS_GPREL16 and R_MIPS_GPREL32. But only R_MIPS_GPREL16 + // can be against a preemptible symbol. + // To get MIPS relocation type we apply 0xff mask. In case of O32 ABI all + // relocation types occupy eight bit. In case of N64 ABI we extract first + // relocation from 3-in-1 packet because only the first relocation can + // be against a real symbol. + if (Config->EMachine == EM_MIPS && (Type & 0xff) == R_MIPS_GPREL16) + return false; + return Body.isPreemptible(); +} + +// This function is similar to the `handleTlsRelocation`. ARM and MIPS do not +// support any relaxations for TLS relocations so by factoring out ARM and MIPS +// handling in to the separate function we can simplify the code and do not +// pollute `handleTlsRelocation` by ARM and MIPS `ifs` statements. +template <class ELFT, class GOT> +static unsigned handleNoRelaxTlsRelocation( + GOT *Got, uint32_t Type, SymbolBody &Body, InputSectionBase<ELFT> &C, + typename ELFT::uint Offset, typename ELFT::uint Addend, RelExpr Expr) { + typedef typename ELFT::uint uintX_t; + auto addModuleReloc = [](SymbolBody &Body, GOT *Got, uintX_t Off, bool LD) { + // The Dynamic TLS Module Index Relocation can be statically resolved to 1 + // if we know that we are linking an executable. For ARM we resolve the + // relocation when writing the Got. MIPS has a custom Got implementation + // that writes the Module index in directly. + if (!Body.isPreemptible() && !Config->Pic && Config->EMachine == EM_ARM) + Got->Relocations.push_back( + {R_ABS, Target->TlsModuleIndexRel, Off, 0, &Body}); + else { + SymbolBody *Dest = LD ? nullptr : &Body; + In<ELFT>::RelaDyn->addReloc( + {Target->TlsModuleIndexRel, Got, Off, false, Dest, 0}); + } + }; + if (Expr == R_MIPS_TLSLD || Expr == R_TLSLD_PC) { + if (Got->addTlsIndex() && (Config->Pic || Config->EMachine == EM_ARM)) + addModuleReloc(Body, Got, Got->getTlsIndexOff(), true); + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + if (Target->isTlsGlobalDynamicRel(Type)) { + if (Got->addDynTlsEntry(Body) && + (Body.isPreemptible() || Config->EMachine == EM_ARM)) { + uintX_t Off = Got->getGlobalDynOffset(Body); + addModuleReloc(Body, Got, Off, false); + if (Body.isPreemptible()) + In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, Got, + Off + (uintX_t)sizeof(uintX_t), false, + &Body, 0}); + } + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + return 0; +} + +// Returns the number of relocations processed. +template <class ELFT> +static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body, + InputSectionBase<ELFT> &C, + typename ELFT::uint Offset, + typename ELFT::uint Addend, RelExpr Expr) { + if (!(C.Flags & SHF_ALLOC)) + return 0; + + if (!Body.isTls()) + return 0; + + typedef typename ELFT::uint uintX_t; + + if (Config->EMachine == EM_ARM) + return handleNoRelaxTlsRelocation<ELFT>(In<ELFT>::Got, Type, Body, C, + Offset, Addend, Expr); + if (Config->EMachine == EM_MIPS) + return handleNoRelaxTlsRelocation<ELFT>(In<ELFT>::MipsGot, Type, Body, C, + Offset, Addend, Expr); + + bool IsPreemptible = isPreemptible(Body, Type); + if ((Expr == R_TLSDESC || Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC_CALL) && + Config->Shared) { + if (In<ELFT>::Got->addDynTlsEntry(Body)) { + uintX_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); + In<ELFT>::RelaDyn->addReloc({Target->TlsDescRel, In<ELFT>::Got, Off, + !IsPreemptible, &Body, 0}); + } + if (Expr != R_TLSDESC_CALL) + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + + if (Expr == R_TLSLD_PC || Expr == R_TLSLD) { + // Local-Dynamic relocs can be relaxed to Local-Exec. + if (!Config->Shared) { + C.Relocations.push_back( + {R_RELAX_TLS_LD_TO_LE, Type, Offset, Addend, &Body}); + return 2; + } + if (In<ELFT>::Got->addTlsIndex()) + In<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, In<ELFT>::Got, + In<ELFT>::Got->getTlsIndexOff(), false, + nullptr, 0}); + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + + // Local-Dynamic relocs can be relaxed to Local-Exec. + if (Target->isTlsLocalDynamicRel(Type) && !Config->Shared) { + C.Relocations.push_back( + {R_RELAX_TLS_LD_TO_LE, Type, Offset, Addend, &Body}); + return 1; + } + + if (Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC || Expr == R_TLSDESC_CALL || + Target->isTlsGlobalDynamicRel(Type)) { + if (Config->Shared) { + if (In<ELFT>::Got->addDynTlsEntry(Body)) { + uintX_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); + In<ELFT>::RelaDyn->addReloc( + {Target->TlsModuleIndexRel, In<ELFT>::Got, Off, false, &Body, 0}); + + // If the symbol is preemptible we need the dynamic linker to write + // the offset too. + uintX_t OffsetOff = Off + (uintX_t)sizeof(uintX_t); + if (IsPreemptible) + In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, In<ELFT>::Got, + OffsetOff, false, &Body, 0}); + else + In<ELFT>::Got->Relocations.push_back( + {R_ABS, Target->TlsOffsetRel, OffsetOff, 0, &Body}); + } + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + + // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec + // depending on the symbol being locally defined or not. + if (IsPreemptible) { + C.Relocations.push_back( + {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_IE), Type, + Offset, Addend, &Body}); + if (!Body.isInGot()) { + In<ELFT>::Got->addEntry(Body); + In<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, In<ELFT>::Got, + Body.getGotOffset<ELFT>(), false, &Body, + 0}); + } + return Target->TlsGdRelaxSkip; + } + C.Relocations.push_back( + {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_LE), Type, + Offset, Addend, &Body}); + return Target->TlsGdRelaxSkip; + } + + // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally + // defined. + if (Target->isTlsInitialExecRel(Type) && !Config->Shared && !IsPreemptible) { + C.Relocations.push_back( + {R_RELAX_TLS_IE_TO_LE, Type, Offset, Addend, &Body}); + return 1; + } + return 0; +} + +template <endianness E> static int16_t readSignedLo16(const uint8_t *Loc) { + return read32<E>(Loc) & 0xffff; +} + +template <class RelTy> +static uint32_t getMipsPairType(const RelTy *Rel, const SymbolBody &Sym) { + switch (Rel->getType(Config->Mips64EL)) { + case R_MIPS_HI16: + return R_MIPS_LO16; + case R_MIPS_GOT16: + return Sym.isLocal() ? R_MIPS_LO16 : R_MIPS_NONE; + case R_MIPS_PCHI16: + return R_MIPS_PCLO16; + case R_MICROMIPS_HI16: + return R_MICROMIPS_LO16; + default: + return R_MIPS_NONE; + } +} + +template <class ELFT, class RelTy> +static int32_t findMipsPairedAddend(const uint8_t *Buf, const uint8_t *BufLoc, + SymbolBody &Sym, const RelTy *Rel, + const RelTy *End) { + uint32_t SymIndex = Rel->getSymbol(Config->Mips64EL); + uint32_t Type = getMipsPairType(Rel, Sym); + + // Some MIPS relocations use addend calculated from addend of the relocation + // itself and addend of paired relocation. ABI requires to compute such + // combined addend in case of REL relocation record format only. + // See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (RelTy::IsRela || Type == R_MIPS_NONE) + return 0; + + for (const RelTy *RI = Rel; RI != End; ++RI) { + if (RI->getType(Config->Mips64EL) != Type) + continue; + if (RI->getSymbol(Config->Mips64EL) != SymIndex) + continue; + const endianness E = ELFT::TargetEndianness; + return ((read32<E>(BufLoc) & 0xffff) << 16) + + readSignedLo16<E>(Buf + RI->r_offset); + } + warn("can't find matching " + toString(Type) + " relocation for " + + toString(Rel->getType(Config->Mips64EL))); + return 0; +} + +// True if non-preemptable symbol always has the same value regardless of where +// the DSO is loaded. +template <class ELFT> static bool isAbsolute(const SymbolBody &Body) { + if (Body.isUndefined()) + return !Body.isLocal() && Body.symbol()->isWeak(); + if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(&Body)) + return DR->Section == nullptr; // Absolute symbol. + return false; +} + +template <class ELFT> static bool isAbsoluteValue(const SymbolBody &Body) { + return isAbsolute<ELFT>(Body) || Body.isTls(); +} + +static bool needsPlt(RelExpr Expr) { + return isRelExprOneOf<R_PLT_PC, R_PPC_PLT_OPD, R_PLT, R_PLT_PAGE_PC, + R_THUNK_PLT_PC>(Expr); +} + +// True if this expression is of the form Sym - X, where X is a position in the +// file (PC, or GOT for example). +static bool isRelExpr(RelExpr Expr) { + return isRelExprOneOf<R_PC, R_GOTREL, R_GOTREL_FROM_END, R_MIPS_GOTREL, + R_PAGE_PC, R_RELAX_GOT_PC, R_THUNK_PC, R_THUNK_PLT_PC>( + Expr); +} + +template <class ELFT> +static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type, + const SymbolBody &Body, + InputSectionBase<ELFT> &S, + typename ELFT::uint RelOff) { + // These expressions always compute a constant + if (isRelExprOneOf<R_SIZE, R_GOT_FROM_END, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, + R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_TLSGD, + R_GOT_PAGE_PC, R_GOT_PC, R_PLT_PC, R_TLSGD_PC, R_TLSGD, + R_PPC_PLT_OPD, R_TLSDESC_CALL, R_TLSDESC_PAGE, R_HINT, + R_THUNK_PC, R_THUNK_PLT_PC>(E)) + return true; + + // These never do, except if the entire file is position dependent or if + // only the low bits are used. + if (E == R_GOT || E == R_PLT || E == R_TLSDESC) + return Target->usesOnlyLowPageBits(Type) || !Config->Pic; + + if (isPreemptible(Body, Type)) + return false; + + if (!Config->Pic) + return true; + + bool AbsVal = isAbsoluteValue<ELFT>(Body); + bool RelE = isRelExpr(E); + if (AbsVal && !RelE) + return true; + if (!AbsVal && RelE) + return true; + + // Relative relocation to an absolute value. This is normally unrepresentable, + // but if the relocation refers to a weak undefined symbol, we allow it to + // resolve to the image base. This is a little strange, but it allows us to + // link function calls to such symbols. Normally such a call will be guarded + // with a comparison, which will load a zero from the GOT. + // Another special case is MIPS _gp_disp symbol which represents offset + // between start of a function and '_gp' value and defined as absolute just + // to simplify the code. + if (AbsVal && RelE) { + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) + return true; + if (&Body == ElfSym<ELFT>::MipsGpDisp) + return true; + error(S.getLocation(RelOff) + ": relocation " + toString(Type) + + " cannot refer to absolute symbol '" + toString(Body) + + "' defined in " + toString(Body.File)); + return true; + } + + return Target->usesOnlyLowPageBits(Type); +} + +static RelExpr toPlt(RelExpr Expr) { + if (Expr == R_PPC_OPD) + return R_PPC_PLT_OPD; + if (Expr == R_PC) + return R_PLT_PC; + if (Expr == R_PAGE_PC) + return R_PLT_PAGE_PC; + if (Expr == R_ABS) + return R_PLT; + return Expr; +} + +static RelExpr fromPlt(RelExpr Expr) { + // We decided not to use a plt. Optimize a reference to the plt to a + // reference to the symbol itself. + if (Expr == R_PLT_PC) + return R_PC; + if (Expr == R_PPC_PLT_OPD) + return R_PPC_OPD; + if (Expr == R_PLT) + return R_ABS; + return Expr; +} + +template <class ELFT> static uint32_t getAlignment(SharedSymbol<ELFT> *SS) { + typedef typename ELFT::uint uintX_t; + + uintX_t SecAlign = SS->file()->getSection(SS->Sym)->sh_addralign; + uintX_t SymValue = SS->Sym.st_value; + int TrailingZeros = + std::min(countTrailingZeros(SecAlign), countTrailingZeros(SymValue)); + return 1 << TrailingZeros; +} + +// Reserve space in .bss for copy relocation. +template <class ELFT> static void addCopyRelSymbol(SharedSymbol<ELFT> *SS) { + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Sym Elf_Sym; + + // Copy relocation against zero-sized symbol doesn't make sense. + uintX_t SymSize = SS->template getSize<ELFT>(); + if (SymSize == 0) + fatal("cannot create a copy relocation for symbol " + toString(*SS)); + + uintX_t Alignment = getAlignment(SS); + uintX_t Off = alignTo(Out<ELFT>::Bss->Size, Alignment); + Out<ELFT>::Bss->Size = Off + SymSize; + Out<ELFT>::Bss->updateAlignment(Alignment); + uintX_t Shndx = SS->Sym.st_shndx; + uintX_t Value = SS->Sym.st_value; + // Look through the DSO's dynamic symbol table for aliases and create a + // dynamic symbol for each one. This causes the copy relocation to correctly + // interpose any aliases. + for (const Elf_Sym &S : SS->file()->getGlobalSymbols()) { + if (S.st_shndx != Shndx || S.st_value != Value) + continue; + auto *Alias = dyn_cast_or_null<SharedSymbol<ELFT>>( + Symtab<ELFT>::X->find(check(S.getName(SS->file()->getStringTable())))); + if (!Alias) + continue; + Alias->OffsetInBss = Off; + Alias->NeedsCopyOrPltAddr = true; + Alias->symbol()->IsUsedInRegularObj = true; + } + In<ELFT>::RelaDyn->addReloc( + {Target->CopyRel, Out<ELFT>::Bss, SS->OffsetInBss, false, SS, 0}); +} + +template <class ELFT> +static RelExpr adjustExpr(const elf::ObjectFile<ELFT> &File, SymbolBody &Body, + bool IsWrite, RelExpr Expr, uint32_t Type, + const uint8_t *Data, InputSectionBase<ELFT> &S, + typename ELFT::uint RelOff) { + bool Preemptible = isPreemptible(Body, Type); + if (Body.isGnuIFunc()) { + Expr = toPlt(Expr); + } else if (!Preemptible) { + if (needsPlt(Expr)) + Expr = fromPlt(Expr); + if (Expr == R_GOT_PC && !isAbsoluteValue<ELFT>(Body)) + Expr = Target->adjustRelaxExpr(Type, Data, Expr); + } + Expr = Target->getThunkExpr(Expr, Type, File, Body); + + if (IsWrite || isStaticLinkTimeConstant<ELFT>(Expr, Type, Body, S, RelOff)) + return Expr; + + // This relocation would require the dynamic linker to write a value to read + // only memory. We can hack around it if we are producing an executable and + // the refered symbol can be preemepted to refer to the executable. + if (Config->Shared || (Config->Pic && !isRelExpr(Expr))) { + error(S.getLocation(RelOff) + ": can't create dynamic relocation " + + toString(Type) + " against " + + (Body.getName().empty() ? "local symbol in readonly segment" + : "symbol '" + toString(Body) + "'") + + " defined in " + toString(Body.File)); + return Expr; + } + if (Body.getVisibility() != STV_DEFAULT) { + error(S.getLocation(RelOff) + ": cannot preempt symbol '" + toString(Body) + + "' defined in " + toString(Body.File)); + return Expr; + } + if (Body.isObject()) { + // Produce a copy relocation. + auto *B = cast<SharedSymbol<ELFT>>(&Body); + if (!B->needsCopy()) + addCopyRelSymbol(B); + return Expr; + } + if (Body.isFunc()) { + // This handles a non PIC program call to function in a shared library. In + // an ideal world, we could just report an error saying the relocation can + // overflow at runtime. In the real world with glibc, crt1.o has a + // R_X86_64_PC32 pointing to libc.so. + // + // The general idea on how to handle such cases is to create a PLT entry and + // use that as the function value. + // + // For the static linking part, we just return a plt expr and everything + // else will use the the PLT entry as the address. + // + // The remaining problem is making sure pointer equality still works. We + // need the help of the dynamic linker for that. We let it know that we have + // a direct reference to a so symbol by creating an undefined symbol with a + // non zero st_value. Seeing that, the dynamic linker resolves the symbol to + // the value of the symbol we created. This is true even for got entries, so + // pointer equality is maintained. To avoid an infinite loop, the only entry + // that points to the real function is a dedicated got entry used by the + // plt. That is identified by special relocation types (R_X86_64_JUMP_SLOT, + // R_386_JMP_SLOT, etc). + Body.NeedsCopyOrPltAddr = true; + return toPlt(Expr); + } + error("symbol '" + toString(Body) + "' defined in " + toString(Body.File) + + " is missing type"); + + return Expr; +} + +template <class ELFT, class RelTy> +static typename ELFT::uint computeAddend(const elf::ObjectFile<ELFT> &File, + const uint8_t *SectionData, + const RelTy *End, const RelTy &RI, + RelExpr Expr, SymbolBody &Body) { + typedef typename ELFT::uint uintX_t; + + uint32_t Type = RI.getType(Config->Mips64EL); + uintX_t Addend = getAddend<ELFT>(RI); + const uint8_t *BufLoc = SectionData + RI.r_offset; + if (!RelTy::IsRela) + Addend += Target->getImplicitAddend(BufLoc, Type); + if (Config->EMachine == EM_MIPS) { + Addend += findMipsPairedAddend<ELFT>(SectionData, BufLoc, Body, &RI, End); + if (Type == R_MIPS_LO16 && Expr == R_PC) + // R_MIPS_LO16 expression has R_PC type iif the target is _gp_disp + // symbol. In that case we should use the following formula for + // calculation "AHL + GP - P + 4". Let's add 4 right here. + // For details see p. 4-19 at + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + Addend += 4; + if (Expr == R_MIPS_GOTREL && Body.isLocal()) + Addend += File.MipsGp0; + } + if (Config->Pic && Config->EMachine == EM_PPC64 && Type == R_PPC64_TOC) + Addend += getPPC64TocBase(); + return Addend; +} + +template <class ELFT> +static void reportUndefined(SymbolBody &Sym, InputSectionBase<ELFT> &S, + typename ELFT::uint Offset) { + if (Config->UnresolvedSymbols == UnresolvedPolicy::Ignore) + return; + + if (Config->Shared && Sym.symbol()->Visibility == STV_DEFAULT && + Config->UnresolvedSymbols != UnresolvedPolicy::NoUndef) + return; + + std::string Msg = + S.getLocation(Offset) + ": undefined symbol '" + toString(Sym) + "'"; + + if (Config->UnresolvedSymbols == UnresolvedPolicy::Warn) + warn(Msg); + else + error(Msg); +} + +template <class RelTy> +static std::pair<uint32_t, uint32_t> +mergeMipsN32RelTypes(uint32_t Type, uint32_t Offset, RelTy *I, RelTy *E) { + // MIPS N32 ABI treats series of successive relocations with the same offset + // as a single relocation. The similar approach used by N64 ABI, but this ABI + // packs all relocations into the single relocation record. Here we emulate + // this for the N32 ABI. Iterate over relocation with the same offset and put + // theirs types into the single bit-set. + uint32_t Processed = 0; + for (; I != E && Offset == I->r_offset; ++I) { + ++Processed; + Type |= I->getType(Config->Mips64EL) << (8 * Processed); + } + return std::make_pair(Type, Processed); +} + +// The reason we have to do this early scan is as follows +// * To mmap the output file, we need to know the size +// * For that, we need to know how many dynamic relocs we will have. +// It might be possible to avoid this by outputting the file with write: +// * Write the allocated output sections, computing addresses. +// * Apply relocations, recording which ones require a dynamic reloc. +// * Write the dynamic relocations. +// * Write the rest of the file. +// This would have some drawbacks. For example, we would only know if .rela.dyn +// is needed after applying relocations. If it is, it will go after rw and rx +// sections. Given that it is ro, we will need an extra PT_LOAD. This +// complicates things for the dynamic linker and means we would have to reserve +// space for the extra PT_LOAD even if we end up not using it. +template <class ELFT, class RelTy> +static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { + typedef typename ELFT::uint uintX_t; + + bool IsWrite = C.Flags & SHF_WRITE; + + auto AddDyn = [=](const DynamicReloc<ELFT> &Reloc) { + In<ELFT>::RelaDyn->addReloc(Reloc); + }; + + const elf::ObjectFile<ELFT> *File = C.getFile(); + ArrayRef<uint8_t> SectionData = C.Data; + const uint8_t *Buf = SectionData.begin(); + + ArrayRef<EhSectionPiece> Pieces; + if (auto *Eh = dyn_cast<EhInputSection<ELFT>>(&C)) + Pieces = Eh->Pieces; + + ArrayRef<EhSectionPiece>::iterator PieceI = Pieces.begin(); + ArrayRef<EhSectionPiece>::iterator PieceE = Pieces.end(); + + for (auto I = Rels.begin(), E = Rels.end(); I != E; ++I) { + const RelTy &RI = *I; + SymbolBody &Body = File->getRelocTargetSym(RI); + uint32_t Type = RI.getType(Config->Mips64EL); + + if (Config->MipsN32Abi) { + uint32_t Processed; + std::tie(Type, Processed) = + mergeMipsN32RelTypes(Type, RI.r_offset, I + 1, E); + I += Processed; + } + + // We only report undefined symbols if they are referenced somewhere in the + // code. + if (!Body.isLocal() && Body.isUndefined() && !Body.symbol()->isWeak()) + reportUndefined(Body, C, RI.r_offset); + + RelExpr Expr = Target->getRelExpr(Type, Body); + bool Preemptible = isPreemptible(Body, Type); + Expr = adjustExpr(*File, Body, IsWrite, Expr, Type, Buf + RI.r_offset, C, + RI.r_offset); + if (ErrorCount) + continue; + + // Skip a relocation that points to a dead piece + // in a eh_frame section. + while (PieceI != PieceE && + (PieceI->InputOff + PieceI->size() <= RI.r_offset)) + ++PieceI; + + // Compute the offset of this section in the output section. We do it here + // to try to compute it only once. + uintX_t Offset; + if (PieceI != PieceE) { + assert(PieceI->InputOff <= RI.r_offset && "Relocation not in any piece"); + if (PieceI->OutputOff == -1) + continue; + Offset = PieceI->OutputOff + RI.r_offset - PieceI->InputOff; + } else { + Offset = RI.r_offset; + } + + // This relocation does not require got entry, but it is relative to got and + // needs it to be created. Here we request for that. + if (Expr == R_GOTONLY_PC || Expr == R_GOTONLY_PC_FROM_END || + Expr == R_GOTREL || Expr == R_GOTREL_FROM_END || Expr == R_PPC_TOC) + In<ELFT>::Got->HasGotOffRel = true; + + uintX_t Addend = computeAddend(*File, Buf, E, RI, Expr, Body); + + if (unsigned Processed = + handleTlsRelocation<ELFT>(Type, Body, C, Offset, Addend, Expr)) { + I += (Processed - 1); + continue; + } + + // Ignore "hint" and TLS Descriptor call relocation because they are + // only markers for relaxation. + if (isRelExprOneOf<R_HINT, R_TLSDESC_CALL>(Expr)) + continue; + + if (needsPlt(Expr) || + isRelExprOneOf<R_THUNK_ABS, R_THUNK_PC, R_THUNK_PLT_PC>(Expr) || + refersToGotEntry(Expr) || !isPreemptible(Body, Type)) { + // If the relocation points to something in the file, we can process it. + bool Constant = + isStaticLinkTimeConstant<ELFT>(Expr, Type, Body, C, RI.r_offset); + + // If the output being produced is position independent, the final value + // is still not known. In that case we still need some help from the + // dynamic linker. We can however do better than just copying the incoming + // relocation. We can process some of it and and just ask the dynamic + // linker to add the load address. + if (!Constant) + AddDyn({Target->RelativeRel, &C, Offset, true, &Body, Addend}); + + // If the produced value is a constant, we just remember to write it + // when outputting this section. We also have to do it if the format + // uses Elf_Rel, since in that case the written value is the addend. + if (Constant || !RelTy::IsRela) + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + } else { + // We don't know anything about the finaly symbol. Just ask the dynamic + // linker to handle the relocation for us. + if (!Target->isPicRel(Type)) + error(C.getLocation(Offset) + ": relocation " + toString(Type) + + " cannot be used against shared object; recompile with -fPIC."); + AddDyn({Target->getDynRel(Type), &C, Offset, false, &Body, Addend}); + + // MIPS ABI turns using of GOT and dynamic relocations inside out. + // While regular ABI uses dynamic relocations to fill up GOT entries + // MIPS ABI requires dynamic linker to fills up GOT entries using + // specially sorted dynamic symbol table. This affects even dynamic + // relocations against symbols which do not require GOT entries + // creation explicitly, i.e. do not have any GOT-relocations. So if + // a preemptible symbol has a dynamic relocation we anyway have + // to create a GOT entry for it. + // If a non-preemptible symbol has a dynamic relocation against it, + // dynamic linker takes it st_value, adds offset and writes down + // result of the dynamic relocation. In case of preemptible symbol + // dynamic linker performs symbol resolution, writes the symbol value + // to the GOT entry and reads the GOT entry when it needs to perform + // a dynamic relocation. + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf p.4-19 + if (Config->EMachine == EM_MIPS) + In<ELFT>::MipsGot->addEntry(Body, Addend, Expr); + continue; + } + + // At this point we are done with the relocated position. Some relocations + // also require us to create a got or plt entry. + + // If a relocation needs PLT, we create a PLT and a GOT slot for the symbol. + if (needsPlt(Expr)) { + if (Body.isInPlt()) + continue; + + if (Body.isGnuIFunc() && !Preemptible) { + In<ELFT>::Iplt->addEntry(Body); + In<ELFT>::IgotPlt->addEntry(Body); + In<ELFT>::RelaIplt->addReloc({Target->IRelativeRel, In<ELFT>::IgotPlt, + Body.getGotPltOffset<ELFT>(), + !Preemptible, &Body, 0}); + } else { + In<ELFT>::Plt->addEntry(Body); + In<ELFT>::GotPlt->addEntry(Body); + In<ELFT>::RelaPlt->addReloc({Target->PltRel, In<ELFT>::GotPlt, + Body.getGotPltOffset<ELFT>(), !Preemptible, + &Body, 0}); + } + continue; + } + + if (refersToGotEntry(Expr)) { + if (Config->EMachine == EM_MIPS) { + // MIPS ABI has special rules to process GOT entries and doesn't + // require relocation entries for them. A special case is TLS + // relocations. In that case dynamic loader applies dynamic + // relocations to initialize TLS GOT entries. + // See "Global Offset Table" in Chapter 5 in the following document + // for detailed description: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + In<ELFT>::MipsGot->addEntry(Body, Addend, Expr); + if (Body.isTls() && Body.isPreemptible()) + AddDyn({Target->TlsGotRel, In<ELFT>::MipsGot, + Body.getGotOffset<ELFT>(), false, &Body, 0}); + continue; + } + + if (Body.isInGot()) + continue; + + In<ELFT>::Got->addEntry(Body); + uintX_t Off = Body.getGotOffset<ELFT>(); + uint32_t DynType; + RelExpr GotRE = R_ABS; + if (Body.isTls()) { + DynType = Target->TlsGotRel; + GotRE = R_TLS; + } else if (!Preemptible && Config->Pic && !isAbsolute<ELFT>(Body)) + DynType = Target->RelativeRel; + else + DynType = Target->GotRel; + + // FIXME: this logic is almost duplicated above. + bool Constant = !Preemptible && !(Config->Pic && !isAbsolute<ELFT>(Body)); + if (!Constant) + AddDyn({DynType, In<ELFT>::Got, Off, !Preemptible, &Body, 0}); + if (Constant || (!RelTy::IsRela && !Preemptible)) + In<ELFT>::Got->Relocations.push_back({GotRE, DynType, Off, 0, &Body}); + continue; + } + } +} + +template <class ELFT> void scanRelocations(InputSectionBase<ELFT> &S) { + if (S.AreRelocsRela) + scanRelocs(S, S.relas()); + else + scanRelocs(S, S.rels()); +} + +template <class ELFT, class RelTy> +static void createThunks(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { + const elf::ObjectFile<ELFT> *File = C.getFile(); + for (const RelTy &Rel : Rels) { + SymbolBody &Body = File->getRelocTargetSym(Rel); + uint32_t Type = Rel.getType(Config->Mips64EL); + RelExpr Expr = Target->getRelExpr(Type, Body); + if (!isPreemptible(Body, Type) && needsPlt(Expr)) + Expr = fromPlt(Expr); + Expr = Target->getThunkExpr(Expr, Type, *File, Body); + // Some targets might require creation of thunks for relocations. + // Now we support only MIPS which requires LA25 thunk to call PIC + // code from non-PIC one, and ARM which requires interworking. + if (Expr == R_THUNK_ABS || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC) { + auto *Sec = cast<InputSection<ELFT>>(&C); + addThunk<ELFT>(Type, Body, *Sec); + } + } +} + +template <class ELFT> void createThunks(InputSectionBase<ELFT> &S) { + if (S.AreRelocsRela) + createThunks(S, S.relas()); + else + createThunks(S, S.rels()); +} + +template void scanRelocations<ELF32LE>(InputSectionBase<ELF32LE> &); +template void scanRelocations<ELF32BE>(InputSectionBase<ELF32BE> &); +template void scanRelocations<ELF64LE>(InputSectionBase<ELF64LE> &); +template void scanRelocations<ELF64BE>(InputSectionBase<ELF64BE> &); + +template void createThunks<ELF32LE>(InputSectionBase<ELF32LE> &); +template void createThunks<ELF32BE>(InputSectionBase<ELF32BE> &); +template void createThunks<ELF64LE>(InputSectionBase<ELF64LE> &); +template void createThunks<ELF64BE>(InputSectionBase<ELF64BE> &); +} +} diff --git a/contrib/llvm/tools/lld/ELF/Relocations.h b/contrib/llvm/tools/lld/ELF/Relocations.h new file mode 100644 index 000000000000..b5825bdd5e59 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Relocations.h @@ -0,0 +1,130 @@ +//===- Relocations.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_RELOCATIONS_H +#define LLD_ELF_RELOCATIONS_H + +#include "lld/Core/LLVM.h" + +namespace lld { +namespace elf { +class SymbolBody; +class InputSectionData; +template <class ELFT> class InputSection; +template <class ELFT> class InputSectionBase; + +// List of target-independent relocation types. Relocations read +// from files are converted to these types so that the main code +// doesn't have to know about architecture-specific details. +enum RelExpr { + R_ABS, + R_GOT, + R_GOTONLY_PC, + R_GOTONLY_PC_FROM_END, + R_GOTREL, + R_GOTREL_FROM_END, + R_GOT_FROM_END, + R_GOT_OFF, + R_GOT_PAGE_PC, + R_GOT_PC, + R_HINT, + R_MIPS_GOT_LOCAL_PAGE, + R_MIPS_GOT_OFF, + R_MIPS_GOT_OFF32, + R_MIPS_GOTREL, + R_MIPS_TLSGD, + R_MIPS_TLSLD, + R_NEG_TLS, + R_PAGE_PC, + R_PC, + R_PLT, + R_PLT_PC, + R_PLT_PAGE_PC, + R_PPC_OPD, + R_PPC_PLT_OPD, + R_PPC_TOC, + R_RELAX_GOT_PC, + R_RELAX_GOT_PC_NOPIC, + R_RELAX_TLS_GD_TO_IE, + R_RELAX_TLS_GD_TO_IE_END, + R_RELAX_TLS_GD_TO_IE_ABS, + R_RELAX_TLS_GD_TO_IE_PAGE_PC, + R_RELAX_TLS_GD_TO_LE, + R_RELAX_TLS_GD_TO_LE_NEG, + R_RELAX_TLS_IE_TO_LE, + R_RELAX_TLS_LD_TO_LE, + R_SIZE, + R_THUNK_ABS, + R_THUNK_PC, + R_THUNK_PLT_PC, + R_TLS, + R_TLSDESC, + R_TLSDESC_PAGE, + R_TLSDESC_CALL, + R_TLSGD, + R_TLSGD_PC, + R_TLSLD, + R_TLSLD_PC, +}; + +// Build a bitmask with one bit set for each RelExpr. +// +// Constexpr function arguments can't be used in static asserts, so we +// use template arguments to build the mask. +// But function template partial specializations don't exist (needed +// for base case of the recursion), so we need a dummy struct. +template <RelExpr... Exprs> struct RelExprMaskBuilder { + static inline uint64_t build() { return 0; } +}; + +// Specialization for recursive case. +template <RelExpr Head, RelExpr... Tail> +struct RelExprMaskBuilder<Head, Tail...> { + static inline uint64_t build() { + static_assert(0 <= Head && Head < 64, + "RelExpr is too large for 64-bit mask!"); + return (uint64_t(1) << Head) | RelExprMaskBuilder<Tail...>::build(); + } +}; + +// Return true if `Expr` is one of `Exprs`. +// There are fewer than 64 RelExpr's, so we can represent any set of +// RelExpr's as a constant bit mask and test for membership with a +// couple cheap bitwise operations. +template <RelExpr... Exprs> bool isRelExprOneOf(RelExpr Expr) { + assert(0 <= Expr && (int)Expr < 64 && "RelExpr is too large for 64-bit mask!"); + return (uint64_t(1) << Expr) & RelExprMaskBuilder<Exprs...>::build(); +} + +// Architecture-neutral representation of relocation. +struct Relocation { + RelExpr Expr; + uint32_t Type; + uint64_t Offset; + uint64_t Addend; + SymbolBody *Sym; +}; + +template <class ELFT> void scanRelocations(InputSectionBase<ELFT> &); + +template <class ELFT> void createThunks(InputSectionBase<ELFT> &); + +template <class ELFT> +static inline typename ELFT::uint getAddend(const typename ELFT::Rel &Rel) { + return 0; +} + +template <class ELFT> +static inline typename ELFT::uint getAddend(const typename ELFT::Rela &Rel) { + return Rel.r_addend; +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/ScriptParser.cpp b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp new file mode 100644 index 000000000000..c740685a15a1 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp @@ -0,0 +1,200 @@ +//===- ScriptParser.cpp ---------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the base parser class for linker script and dynamic +// list. +// +//===----------------------------------------------------------------------===// + +#include "ScriptParser.h" +#include "Error.h" +#include "llvm/ADT/Twine.h" + +using namespace llvm; +using namespace lld; +using namespace lld::elf; + +// Returns a whole line containing the current token. +StringRef ScriptParserBase::getLine() { + StringRef S = getCurrentMB().getBuffer(); + StringRef Tok = Tokens[Pos - 1]; + + size_t Pos = S.rfind('\n', Tok.data() - S.data()); + if (Pos != StringRef::npos) + S = S.substr(Pos + 1); + return S.substr(0, S.find_first_of("\r\n")); +} + +// Returns 1-based line number of the current token. +size_t ScriptParserBase::getLineNumber() { + StringRef S = getCurrentMB().getBuffer(); + StringRef Tok = Tokens[Pos - 1]; + return S.substr(0, Tok.data() - S.data()).count('\n') + 1; +} + +// Returns 0-based column number of the current token. +size_t ScriptParserBase::getColumnNumber() { + StringRef Tok = Tokens[Pos - 1]; + return Tok.data() - getLine().data(); +} + +std::string ScriptParserBase::getCurrentLocation() { + std::string Filename = getCurrentMB().getBufferIdentifier(); + if (!Pos) + return Filename; + return (Filename + ":" + Twine(getLineNumber())).str(); +} + +ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) { tokenize(MB); } + +// We don't want to record cascading errors. Keep only the first one. +void ScriptParserBase::setError(const Twine &Msg) { + if (Error) + return; + Error = true; + + if (!Pos) { + error(getCurrentLocation() + ": " + Msg); + return; + } + + std::string S = getCurrentLocation() + ": "; + error(S + Msg); + error(S + getLine()); + error(S + std::string(getColumnNumber(), ' ') + "^"); +} + +// Split S into linker script tokens. +void ScriptParserBase::tokenize(MemoryBufferRef MB) { + std::vector<StringRef> Vec; + MBs.push_back(MB); + StringRef S = MB.getBuffer(); + StringRef Begin = S; + + for (;;) { + S = skipSpace(S); + if (S.empty()) + break; + + // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted + // as glob patterns. Double-quoted tokens are literal patterns in that + // context. + if (S.startswith("\"")) { + size_t E = S.find("\"", 1); + if (E == StringRef::npos) { + StringRef Filename = MB.getBufferIdentifier(); + size_t Lineno = Begin.substr(0, S.data() - Begin.data()).count('\n'); + error(Filename + ":" + Twine(Lineno + 1) + ": unclosed quote"); + return; + } + + Vec.push_back(S.take_front(E + 1)); + S = S.substr(E + 1); + continue; + } + + // Unquoted token. This is more relaxed than tokens in C-like language, + // so that you can write "file-name.cpp" as one bare token, for example. + size_t Pos = S.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-:!<>^"); + + // A character that cannot start a word (which is usually a + // punctuation) forms a single character token. + if (Pos == 0) + Pos = 1; + Vec.push_back(S.substr(0, Pos)); + S = S.substr(Pos); + } + + Tokens.insert(Tokens.begin() + Pos, Vec.begin(), Vec.end()); +} + +// Skip leading whitespace characters or comments. +StringRef ScriptParserBase::skipSpace(StringRef S) { + for (;;) { + if (S.startswith("/*")) { + size_t E = S.find("*/", 2); + if (E == StringRef::npos) { + error("unclosed comment in a linker script"); + return ""; + } + S = S.substr(E + 2); + continue; + } + if (S.startswith("#")) { + size_t E = S.find('\n', 1); + if (E == StringRef::npos) + E = S.size() - 1; + S = S.substr(E + 1); + continue; + } + size_t Size = S.size(); + S = S.ltrim(); + if (S.size() == Size) + return S; + } +} + +// An erroneous token is handled as if it were the last token before EOF. +bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; } + +StringRef ScriptParserBase::next() { + if (Error) + return ""; + if (atEOF()) { + setError("unexpected EOF"); + return ""; + } + return Tokens[Pos++]; +} + +StringRef ScriptParserBase::peek() { + StringRef Tok = next(); + if (Error) + return ""; + --Pos; + return Tok; +} + +bool ScriptParserBase::consume(StringRef Tok) { + if (peek() == Tok) { + skip(); + return true; + } + return false; +} + +void ScriptParserBase::skip() { (void)next(); } + +void ScriptParserBase::expect(StringRef Expect) { + if (Error) + return; + StringRef Tok = next(); + if (Tok != Expect) + setError(Expect + " expected, but got " + Tok); +} + +// Returns true if S encloses T. +static bool encloses(StringRef S, StringRef T) { + return S.bytes_begin() <= T.bytes_begin() && T.bytes_end() <= S.bytes_end(); +} + +MemoryBufferRef ScriptParserBase::getCurrentMB() { + // Find input buffer containing the current token. + assert(!MBs.empty()); + if (!Pos) + return MBs[0]; + + for (MemoryBufferRef MB : MBs) + if (encloses(MB.getBuffer(), Tokens[Pos - 1])) + return MB; + llvm_unreachable("getCurrentMB: failed to find a token"); +} diff --git a/contrib/llvm/tools/lld/ELF/ScriptParser.h b/contrib/llvm/tools/lld/ELF/ScriptParser.h new file mode 100644 index 000000000000..264c49792337 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/ScriptParser.h @@ -0,0 +1,53 @@ +//===- ScriptParser.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SCRIPT_PARSER_H +#define LLD_ELF_SCRIPT_PARSER_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include <utility> +#include <vector> + +namespace lld { +namespace elf { + +class ScriptParserBase { +public: + explicit ScriptParserBase(MemoryBufferRef MB); + + void setError(const Twine &Msg); + void tokenize(MemoryBufferRef MB); + static StringRef skipSpace(StringRef S); + bool atEOF(); + StringRef next(); + StringRef peek(); + void skip(); + bool consume(StringRef Tok); + void expect(StringRef Expect); + std::string getCurrentLocation(); + + std::vector<MemoryBufferRef> MBs; + std::vector<StringRef> Tokens; + size_t Pos = 0; + bool Error = false; + +private: + StringRef getLine(); + size_t getLineNumber(); + size_t getColumnNumber(); + + MemoryBufferRef getCurrentMB(); +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Strings.cpp b/contrib/llvm/tools/lld/ELF/Strings.cpp new file mode 100644 index 000000000000..ec3d1f1b2b51 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Strings.cpp @@ -0,0 +1,108 @@ +//===- Strings.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Strings.h" +#include "Config.h" +#include "Error.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Demangle/Demangle.h" +#include <algorithm> +#include <cstring> + +using namespace llvm; +using namespace lld; +using namespace lld::elf; + +StringMatcher::StringMatcher(ArrayRef<StringRef> Pat) { + for (StringRef S : Pat) { + Expected<GlobPattern> Pat = GlobPattern::create(S); + if (!Pat) + error(toString(Pat.takeError())); + else + Patterns.push_back(*Pat); + } +} + +bool StringMatcher::match(StringRef S) const { + for (const GlobPattern &Pat : Patterns) + if (Pat.match(S)) + return true; + return false; +} + +// If an input string is in the form of "foo.N" where N is a number, +// return N. Otherwise, returns 65536, which is one greater than the +// lowest priority. +int elf::getPriority(StringRef S) { + size_t Pos = S.rfind('.'); + if (Pos == StringRef::npos) + return 65536; + int V; + if (S.substr(Pos + 1).getAsInteger(10, V)) + return 65536; + return V; +} + +bool elf::hasWildcard(StringRef S) { + return S.find_first_of("?*[") != StringRef::npos; +} + +StringRef elf::unquote(StringRef S) { + if (!S.startswith("\"")) + return S; + return S.substr(1, S.size() - 2); +} + +// Converts a hex string (e.g. "deadbeef") to a vector. +std::vector<uint8_t> elf::parseHex(StringRef S) { + std::vector<uint8_t> Hex; + while (!S.empty()) { + StringRef B = S.substr(0, 2); + S = S.substr(2); + uint8_t H; + if (B.getAsInteger(16, H)) { + error("not a hexadecimal value: " + B); + return {}; + } + Hex.push_back(H); + } + return Hex; +} + +static bool isAlpha(char C) { + return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_'; +} + +static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); } + +// Returns true if S is valid as a C language identifier. +bool elf::isValidCIdentifier(StringRef S) { + return !S.empty() && isAlpha(S[0]) && + std::all_of(S.begin() + 1, S.end(), isAlnum); +} + +// Returns the demangled C++ symbol name for Name. +Optional<std::string> elf::demangle(StringRef Name) { + // __cxa_demangle can be used to demangle strings other than symbol + // names which do not necessarily start with "_Z". Name can be + // either a C or C++ symbol. Don't call __cxa_demangle if the name + // does not look like a C++ symbol name to avoid getting unexpected + // result for a C symbol that happens to match a mangled type name. + if (!Name.startswith("_Z")) + return None; + + char *Buf = itaniumDemangle(Name.str().c_str(), nullptr, nullptr, nullptr); + if (!Buf) + return None; + std::string S(Buf); + free(Buf); + return S; +} diff --git a/contrib/llvm/tools/lld/ELF/Strings.h b/contrib/llvm/tools/lld/ELF/Strings.h new file mode 100644 index 000000000000..934b6427105f --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Strings.h @@ -0,0 +1,82 @@ +//===- Strings.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_STRINGS_H +#define LLD_ELF_STRINGS_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/GlobPattern.h" +#include <vector> + +namespace lld { +namespace elf { + +int getPriority(StringRef S); +bool hasWildcard(StringRef S); +std::vector<uint8_t> parseHex(StringRef S); +bool isValidCIdentifier(StringRef S); +StringRef unquote(StringRef S); + +// This is a lazy version of StringRef. String size is computed lazily +// when it is needed. It is more efficient than StringRef to instantiate +// if you have a string whose size is unknown. +// +// ELF string tables contain a lot of null-terminated strings. +// Most of them are not necessary for the linker because they are names +// of local symbols and the linker doesn't use local symbol names for +// name resolution. So, we use this class to represents strings read +// from string tables. +class StringRefZ { +public: + StringRefZ() : Start(nullptr), Size(0) {} + StringRefZ(const char *S, size_t Size) : Start(S), Size(Size) {} + + /*implicit*/ StringRefZ(const char *S) : Start(S), Size(-1) {} + + /*implicit*/ StringRefZ(llvm::StringRef S) + : Start(S.data()), Size(S.size()) {} + + operator llvm::StringRef() const { + if (Size == (size_t)-1) + Size = strlen(Start); + return {Start, Size}; + } + +private: + const char *Start; + mutable size_t Size; +}; + +// This class represents multiple glob patterns. +class StringMatcher { +public: + StringMatcher() = default; + explicit StringMatcher(ArrayRef<StringRef> Pat); + + bool match(StringRef S) const; + +private: + std::vector<llvm::GlobPattern> Patterns; +}; + +// Returns a demangled C++ symbol name. If Name is not a mangled +// name, it returns Optional::None. +llvm::Optional<std::string> demangle(StringRef Name); + +inline StringRef toStringRef(ArrayRef<uint8_t> Arr) { + return {(const char *)Arr.data(), Arr.size()}; +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp new file mode 100644 index 000000000000..f08fa6229c1a --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp @@ -0,0 +1,710 @@ +//===- SymbolTable.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Symbol table is a bag of all known symbols. We put all symbols of +// all input files to the symbol table. The symbol table is basically +// a hash table with the logic to resolve symbol name conflicts using +// the symbol types. +// +//===----------------------------------------------------------------------===// + +#include "SymbolTable.h" +#include "Config.h" +#include "Error.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "Symbols.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +// All input object files must be for the same architecture +// (e.g. it does not make sense to link x86 object files with +// MIPS object files.) This function checks for that error. +template <class ELFT> static bool isCompatible(InputFile *F) { + if (!isa<ELFFileBase<ELFT>>(F) && !isa<BitcodeFile>(F)) + return true; + + if (F->EKind == Config->EKind && F->EMachine == Config->EMachine) { + if (Config->EMachine != EM_MIPS) + return true; + if (isMipsN32Abi(F) == Config->MipsN32Abi) + return true; + } + + if (!Config->Emulation.empty()) + error(toString(F) + " is incompatible with " + Config->Emulation); + else + error(toString(F) + " is incompatible with " + toString(Config->FirstElf)); + return false; +} + +// Add symbols in File to the symbol table. +template <class ELFT> void SymbolTable<ELFT>::addFile(InputFile *File) { + if (!isCompatible<ELFT>(File)) + return; + + // Binary file + if (auto *F = dyn_cast<BinaryFile>(File)) { + BinaryFiles.push_back(F); + F->parse<ELFT>(); + return; + } + + // .a file + if (auto *F = dyn_cast<ArchiveFile>(File)) { + F->parse<ELFT>(); + return; + } + + // Lazy object file + if (auto *F = dyn_cast<LazyObjectFile>(File)) { + F->parse<ELFT>(); + return; + } + + if (Config->Trace) + outs() << toString(File) << "\n"; + + // .so file + if (auto *F = dyn_cast<SharedFile<ELFT>>(File)) { + // DSOs are uniquified not by filename but by soname. + F->parseSoName(); + if (ErrorCount || !SoNames.insert(F->getSoName()).second) + return; + SharedFiles.push_back(F); + F->parseRest(); + return; + } + + // LLVM bitcode file + if (auto *F = dyn_cast<BitcodeFile>(File)) { + BitcodeFiles.push_back(F); + F->parse<ELFT>(ComdatGroups); + return; + } + + // Regular object file + auto *F = cast<ObjectFile<ELFT>>(File); + ObjectFiles.push_back(F); + F->parse(ComdatGroups); +} + +// This function is where all the optimizations of link-time +// optimization happens. When LTO is in use, some input files are +// not in native object file format but in the LLVM bitcode format. +// This function compiles bitcode files into a few big native files +// using LLVM functions and replaces bitcode symbols with the results. +// Because all bitcode files that consist of a program are passed +// to the compiler at once, it can do whole-program optimization. +template <class ELFT> void SymbolTable<ELFT>::addCombinedLTOObject() { + if (BitcodeFiles.empty()) + return; + + // Compile bitcode files and replace bitcode symbols. + LTO.reset(new BitcodeCompiler); + for (BitcodeFile *F : BitcodeFiles) + LTO->add<ELFT>(*F); + + for (InputFile *File : LTO->compile()) { + ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(File); + DenseSet<CachedHashStringRef> DummyGroups; + Obj->parse(DummyGroups); + ObjectFiles.push_back(Obj); + } +} + +template <class ELFT> +DefinedRegular<ELFT> *SymbolTable<ELFT>::addAbsolute(StringRef Name, + uint8_t Visibility, + uint8_t Binding) { + Symbol *Sym = + addRegular(Name, Visibility, STT_NOTYPE, 0, 0, Binding, nullptr, nullptr); + return cast<DefinedRegular<ELFT>>(Sym->body()); +} + +// Add Name as an "ignored" symbol. An ignored symbol is a regular +// linker-synthesized defined symbol, but is only defined if needed. +template <class ELFT> +DefinedRegular<ELFT> *SymbolTable<ELFT>::addIgnored(StringRef Name, + uint8_t Visibility) { + SymbolBody *S = find(Name); + if (!S || !S->isUndefined()) + return nullptr; + return addAbsolute(Name, Visibility); +} + +// Set a flag for --trace-symbol so that we can print out a log message +// if a new symbol with the same name is inserted into the symbol table. +template <class ELFT> void SymbolTable<ELFT>::trace(StringRef Name) { + Symtab.insert({CachedHashStringRef(Name), {-1, true}}); +} + +// Rename SYM as __wrap_SYM. The original symbol is preserved as __real_SYM. +// Used to implement --wrap. +template <class ELFT> void SymbolTable<ELFT>::wrap(StringRef Name) { + SymbolBody *B = find(Name); + if (!B) + return; + Symbol *Sym = B->symbol(); + Symbol *Real = addUndefined(Saver.save("__real_" + Name)); + Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name)); + + // We rename symbols by replacing the old symbol's SymbolBody with the new + // symbol's SymbolBody. This causes all SymbolBody pointers referring to the + // old symbol to instead refer to the new symbol. + memcpy(Real->Body.buffer, Sym->Body.buffer, sizeof(Sym->Body)); + memcpy(Sym->Body.buffer, Wrap->Body.buffer, sizeof(Wrap->Body)); +} + +static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) { + if (VA == STV_DEFAULT) + return VB; + if (VB == STV_DEFAULT) + return VA; + return std::min(VA, VB); +} + +// Find an existing symbol or create and insert a new one. +template <class ELFT> +std::pair<Symbol *, bool> SymbolTable<ELFT>::insert(StringRef Name) { + auto P = Symtab.insert( + {CachedHashStringRef(Name), SymIndex((int)SymVector.size(), false)}); + SymIndex &V = P.first->second; + bool IsNew = P.second; + + if (V.Idx == -1) { + IsNew = true; + V = SymIndex((int)SymVector.size(), true); + } + + Symbol *Sym; + if (IsNew) { + Sym = new (BAlloc) Symbol; + Sym->InVersionScript = false; + Sym->Binding = STB_WEAK; + Sym->Visibility = STV_DEFAULT; + Sym->IsUsedInRegularObj = false; + Sym->ExportDynamic = false; + Sym->Traced = V.Traced; + Sym->VersionId = Config->DefaultSymbolVersion; + SymVector.push_back(Sym); + } else { + Sym = SymVector[V.Idx]; + } + return {Sym, IsNew}; +} + +// Construct a string in the form of "Sym in File1 and File2". +// Used to construct an error message. +static std::string conflictMsg(SymbolBody *Existing, InputFile *NewFile) { + return "'" + toString(*Existing) + "' in " + toString(Existing->File) + + " and " + toString(NewFile); +} + +// Find an existing symbol or create and insert a new one, then apply the given +// attributes. +template <class ELFT> +std::pair<Symbol *, bool> +SymbolTable<ELFT>::insert(StringRef Name, uint8_t Type, uint8_t Visibility, + bool CanOmitFromDynSym, InputFile *File) { + bool IsUsedInRegularObj = !File || File->kind() == InputFile::ObjectKind; + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + + // Merge in the new symbol's visibility. + S->Visibility = getMinVisibility(S->Visibility, Visibility); + if (!CanOmitFromDynSym && (Config->Shared || Config->ExportDynamic)) + S->ExportDynamic = true; + if (IsUsedInRegularObj) + S->IsUsedInRegularObj = true; + if (!WasInserted && S->body()->Type != SymbolBody::UnknownType && + ((Type == STT_TLS) != S->body()->isTls())) + error("TLS attribute mismatch for symbol " + conflictMsg(S->body(), File)); + + return {S, WasInserted}; +} + +template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) { + return addUndefined(Name, /*IsLocal=*/false, STB_GLOBAL, STV_DEFAULT, + /*Type*/ 0, + /*CanOmitFromDynSym*/ false, /*File*/ nullptr); +} + +static uint8_t getVisibility(uint8_t StOther) { return StOther & 3; } + +template <class ELFT> +Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, bool IsLocal, + uint8_t Binding, uint8_t StOther, + uint8_t Type, bool CanOmitFromDynSym, + InputFile *File) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(Name, Type, getVisibility(StOther), CanOmitFromDynSym, File); + if (WasInserted) { + S->Binding = Binding; + replaceBody<Undefined<ELFT>>(S, Name, IsLocal, StOther, Type, File); + return S; + } + if (Binding != STB_WEAK) { + if (S->body()->isShared() || S->body()->isLazy()) + S->Binding = Binding; + if (auto *SS = dyn_cast<SharedSymbol<ELFT>>(S->body())) + SS->file()->IsUsed = true; + } + if (auto *L = dyn_cast<Lazy>(S->body())) { + // An undefined weak will not fetch archive members, but we have to remember + // its type. See also comment in addLazyArchive. + if (S->isWeak()) + L->Type = Type; + else if (InputFile *F = L->fetch()) + addFile(F); + } + return S; +} + +// We have a new defined symbol with the specified binding. Return 1 if the new +// symbol should win, -1 if the new symbol should lose, or 0 if both symbols are +// strong defined symbols. +static int compareDefined(Symbol *S, bool WasInserted, uint8_t Binding) { + if (WasInserted) + return 1; + SymbolBody *Body = S->body(); + if (Body->isLazy() || Body->isUndefined() || Body->isShared()) + return 1; + if (Binding == STB_WEAK) + return -1; + if (S->isWeak()) + return 1; + return 0; +} + +// We have a new non-common defined symbol with the specified binding. Return 1 +// if the new symbol should win, -1 if the new symbol should lose, or 0 if there +// is a conflict. If the new symbol wins, also update the binding. +template <typename ELFT> +static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding, + bool IsAbsolute, typename ELFT::uint Value) { + if (int Cmp = compareDefined(S, WasInserted, Binding)) { + if (Cmp > 0) + S->Binding = Binding; + return Cmp; + } + SymbolBody *B = S->body(); + if (isa<DefinedCommon>(B)) { + // Non-common symbols take precedence over common symbols. + if (Config->WarnCommon) + warn("common " + S->body()->getName() + " is overridden"); + return 1; + } else if (auto *R = dyn_cast<DefinedRegular<ELFT>>(B)) { + if (R->Section == nullptr && Binding == STB_GLOBAL && IsAbsolute && + R->Value == Value) + return -1; + } + return 0; +} + +template <class ELFT> +Symbol *SymbolTable<ELFT>::addCommon(StringRef N, uint64_t Size, + uint64_t Alignment, uint8_t Binding, + uint8_t StOther, uint8_t Type, + InputFile *File) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N, Type, getVisibility(StOther), + /*CanOmitFromDynSym*/ false, File); + int Cmp = compareDefined(S, WasInserted, Binding); + if (Cmp > 0) { + S->Binding = Binding; + replaceBody<DefinedCommon>(S, N, Size, Alignment, StOther, Type, File); + } else if (Cmp == 0) { + auto *C = dyn_cast<DefinedCommon>(S->body()); + if (!C) { + // Non-common symbols take precedence over common symbols. + if (Config->WarnCommon) + warn("common " + S->body()->getName() + " is overridden"); + return S; + } + + if (Config->WarnCommon) + warn("multiple common of " + S->body()->getName()); + + Alignment = C->Alignment = std::max(C->Alignment, Alignment); + if (Size > C->Size) + replaceBody<DefinedCommon>(S, N, Size, Alignment, StOther, Type, File); + } + return S; +} + +static void print(const Twine &Msg) { + if (Config->AllowMultipleDefinition) + warn(Msg); + else + error(Msg); +} + +static void reportDuplicate(SymbolBody *Existing, InputFile *NewFile) { + print("duplicate symbol " + conflictMsg(Existing, NewFile)); +} + +template <class ELFT> +static void reportDuplicate(SymbolBody *Existing, + InputSectionBase<ELFT> *ErrSec, + typename ELFT::uint ErrOffset) { + DefinedRegular<ELFT> *D = dyn_cast<DefinedRegular<ELFT>>(Existing); + if (!D || !D->Section || !ErrSec) { + reportDuplicate(Existing, ErrSec ? ErrSec->getFile() : nullptr); + return; + } + + std::string OldLoc = D->Section->getLocation(D->Value); + std::string NewLoc = ErrSec->getLocation(ErrOffset); + + print(NewLoc + ": duplicate symbol '" + toString(*Existing) + "'"); + print(OldLoc + ": previous definition was here"); +} + +template <typename ELFT> +Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, uint8_t StOther, + uint8_t Type, uintX_t Value, uintX_t Size, + uint8_t Binding, + InputSectionBase<ELFT> *Section, + InputFile *File) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name, Type, getVisibility(StOther), + /*CanOmitFromDynSym*/ false, File); + int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, Binding, + Section == nullptr, Value); + if (Cmp > 0) + replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type, + Value, Size, Section, File); + else if (Cmp == 0) + reportDuplicate(S->body(), Section, Value); + return S; +} + +template <typename ELFT> +Symbol *SymbolTable<ELFT>::addSynthetic(StringRef N, + const OutputSectionBase *Section, + uintX_t Value, uint8_t StOther) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N, STT_NOTYPE, getVisibility(StOther), + /*CanOmitFromDynSym*/ false, nullptr); + int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, STB_GLOBAL, + /*IsAbsolute*/ false, /*Value*/ 0); + if (Cmp > 0) + replaceBody<DefinedSynthetic>(S, N, Value, Section); + else if (Cmp == 0) + reportDuplicate(S->body(), nullptr); + return S; +} + +template <typename ELFT> +void SymbolTable<ELFT>::addShared(SharedFile<ELFT> *F, StringRef Name, + const Elf_Sym &Sym, + const typename ELFT::Verdef *Verdef) { + // DSO symbols do not affect visibility in the output, so we pass STV_DEFAULT + // as the visibility, which will leave the visibility in the symbol table + // unchanged. + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(Name, Sym.getType(), STV_DEFAULT, /*CanOmitFromDynSym*/ true, F); + // Make sure we preempt DSO symbols with default visibility. + if (Sym.getVisibility() == STV_DEFAULT) { + S->ExportDynamic = true; + // Exporting preempting symbols takes precedence over linker scripts. + if (S->VersionId == VER_NDX_LOCAL) + S->VersionId = VER_NDX_GLOBAL; + } + if (WasInserted || isa<Undefined<ELFT>>(S->body())) { + replaceBody<SharedSymbol<ELFT>>(S, F, Name, Sym, Verdef); + if (!S->isWeak()) + F->IsUsed = true; + } +} + +template <class ELFT> +Symbol *SymbolTable<ELFT>::addBitcode(StringRef Name, uint8_t Binding, + uint8_t StOther, uint8_t Type, + bool CanOmitFromDynSym, BitcodeFile *F) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(Name, Type, getVisibility(StOther), CanOmitFromDynSym, F); + int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, Binding, + /*IsAbs*/ false, /*Value*/ 0); + if (Cmp > 0) + replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type, + 0, 0, nullptr, F); + else if (Cmp == 0) + reportDuplicate(S->body(), F); + return S; +} + +template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) { + auto It = Symtab.find(CachedHashStringRef(Name)); + if (It == Symtab.end()) + return nullptr; + SymIndex V = It->second; + if (V.Idx == -1) + return nullptr; + return SymVector[V.Idx]->body(); +} + +template <class ELFT> +void SymbolTable<ELFT>::addLazyArchive(ArchiveFile *F, + const object::Archive::Symbol Sym) { + Symbol *S; + bool WasInserted; + StringRef Name = Sym.getName(); + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + replaceBody<LazyArchive>(S, *F, Sym, SymbolBody::UnknownType); + return; + } + if (!S->body()->isUndefined()) + return; + + // Weak undefined symbols should not fetch members from archives. If we were + // to keep old symbol we would not know that an archive member was available + // if a strong undefined symbol shows up afterwards in the link. If a strong + // undefined symbol never shows up, this lazy symbol will get to the end of + // the link and must be treated as the weak undefined one. We already marked + // this symbol as used when we added it to the symbol table, but we also need + // to preserve its type. FIXME: Move the Type field to Symbol. + if (S->isWeak()) { + replaceBody<LazyArchive>(S, *F, Sym, S->body()->Type); + return; + } + std::pair<MemoryBufferRef, uint64_t> MBInfo = F->getMember(&Sym); + if (!MBInfo.first.getBuffer().empty()) + addFile(createObjectFile(MBInfo.first, F->getName(), MBInfo.second)); +} + +template <class ELFT> +void SymbolTable<ELFT>::addLazyObject(StringRef Name, LazyObjectFile &Obj) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + replaceBody<LazyObject>(S, Name, Obj, SymbolBody::UnknownType); + return; + } + if (!S->body()->isUndefined()) + return; + + // See comment for addLazyArchive above. + if (S->isWeak()) { + replaceBody<LazyObject>(S, Name, Obj, S->body()->Type); + } else { + MemoryBufferRef MBRef = Obj.getBuffer(); + if (!MBRef.getBuffer().empty()) + addFile(createObjectFile(MBRef)); + } +} + +// Process undefined (-u) flags by loading lazy symbols named by those flags. +template <class ELFT> void SymbolTable<ELFT>::scanUndefinedFlags() { + for (StringRef S : Config->Undefined) + if (auto *L = dyn_cast_or_null<Lazy>(find(S))) + if (InputFile *File = L->fetch()) + addFile(File); +} + +// This function takes care of the case in which shared libraries depend on +// the user program (not the other way, which is usual). Shared libraries +// may have undefined symbols, expecting that the user program provides +// the definitions for them. An example is BSD's __progname symbol. +// We need to put such symbols to the main program's .dynsym so that +// shared libraries can find them. +// Except this, we ignore undefined symbols in DSOs. +template <class ELFT> void SymbolTable<ELFT>::scanShlibUndefined() { + for (SharedFile<ELFT> *File : SharedFiles) + for (StringRef U : File->getUndefinedSymbols()) + if (SymbolBody *Sym = find(U)) + if (Sym->isDefined()) + Sym->symbol()->ExportDynamic = true; +} + +// Initialize DemangledSyms with a map from demangled symbols to symbol +// objects. Used to handle "extern C++" directive in version scripts. +// +// The map will contain all demangled symbols. That can be very large, +// and in LLD we generally want to avoid do anything for each symbol. +// Then, why are we doing this? Here's why. +// +// Users can use "extern C++ {}" directive to match against demangled +// C++ symbols. For example, you can write a pattern such as +// "llvm::*::foo(int, ?)". Obviously, there's no way to handle this +// other than trying to match a pattern against all demangled symbols. +// So, if "extern C++" feature is used, we need to demangle all known +// symbols. +template <class ELFT> +StringMap<std::vector<SymbolBody *>> &SymbolTable<ELFT>::getDemangledSyms() { + if (!DemangledSyms) { + DemangledSyms.emplace(); + for (Symbol *Sym : SymVector) { + SymbolBody *B = Sym->body(); + if (B->isUndefined()) + continue; + if (Optional<std::string> S = demangle(B->getName())) + (*DemangledSyms)[*S].push_back(B); + else + (*DemangledSyms)[B->getName()].push_back(B); + } + } + return *DemangledSyms; +} + +template <class ELFT> +std::vector<SymbolBody *> SymbolTable<ELFT>::findByVersion(SymbolVersion Ver) { + if (Ver.IsExternCpp) + return getDemangledSyms().lookup(Ver.Name); + if (SymbolBody *B = find(Ver.Name)) + if (!B->isUndefined()) + return {B}; + return {}; +} + +template <class ELFT> +std::vector<SymbolBody *> +SymbolTable<ELFT>::findAllByVersion(SymbolVersion Ver) { + std::vector<SymbolBody *> Res; + StringMatcher M(Ver.Name); + + if (Ver.IsExternCpp) { + for (auto &P : getDemangledSyms()) + if (M.match(P.first())) + Res.insert(Res.end(), P.second.begin(), P.second.end()); + return Res; + } + + for (Symbol *Sym : SymVector) { + SymbolBody *B = Sym->body(); + if (!B->isUndefined() && M.match(B->getName())) + Res.push_back(B); + } + return Res; +} + +// If there's only one anonymous version definition in a version +// script file, the script does not actually define any symbol version, +// but just specifies symbols visibilities. We assume that the script was +// in the form of { global: foo; bar; local *; }. So, local is default. +// In this function, we make specified symbols global. +template <class ELFT> void SymbolTable<ELFT>::handleAnonymousVersion() { + for (SymbolVersion &Ver : Config->VersionScriptGlobals) { + if (Ver.HasWildcard) { + for (SymbolBody *B : findAllByVersion(Ver)) + B->symbol()->VersionId = VER_NDX_GLOBAL; + continue; + } + for (SymbolBody *B : findByVersion(Ver)) + B->symbol()->VersionId = VER_NDX_GLOBAL; + } +} + +// Set symbol versions to symbols. This function handles patterns +// containing no wildcard characters. +template <class ELFT> +void SymbolTable<ELFT>::assignExactVersion(SymbolVersion Ver, uint16_t VersionId, + StringRef VersionName) { + if (Ver.HasWildcard) + return; + + // Get a list of symbols which we need to assign the version to. + std::vector<SymbolBody *> Syms = findByVersion(Ver); + if (Syms.empty()) { + if (Config->NoUndefinedVersion) + error("version script assignment of '" + VersionName + "' to symbol '" + + Ver.Name + "' failed: symbol not defined"); + return; + } + + // Assign the version. + for (SymbolBody *B : Syms) { + Symbol *Sym = B->symbol(); + if (Sym->InVersionScript) + warn("duplicate symbol '" + Ver.Name + "' in version script"); + Sym->VersionId = VersionId; + Sym->InVersionScript = true; + } +} + +template <class ELFT> +void SymbolTable<ELFT>::assignWildcardVersion(SymbolVersion Ver, + uint16_t VersionId) { + if (!Ver.HasWildcard) + return; + std::vector<SymbolBody *> Syms = findAllByVersion(Ver); + + // Exact matching takes precendence over fuzzy matching, + // so we set a version to a symbol only if no version has been assigned + // to the symbol. This behavior is compatible with GNU. + for (SymbolBody *B : Syms) + if (B->symbol()->VersionId == Config->DefaultSymbolVersion) + B->symbol()->VersionId = VersionId; +} + +// This function processes version scripts by updating VersionId +// member of symbols. +template <class ELFT> void SymbolTable<ELFT>::scanVersionScript() { + // Symbol themselves might know their versions because symbols + // can contain versions in the form of <name>@<version>. + // Let them parse their names. + if (!Config->VersionDefinitions.empty()) + for (Symbol *Sym : SymVector) + Sym->body()->parseSymbolVersion(); + + // Handle edge cases first. + if (!Config->VersionScriptGlobals.empty()) { + handleAnonymousVersion(); + return; + } + + if (Config->VersionDefinitions.empty()) + return; + + // Now we have version definitions, so we need to set version ids to symbols. + // Each version definition has a glob pattern, and all symbols that match + // with the pattern get that version. + + // First, we assign versions to exact matching symbols, + // i.e. version definitions not containing any glob meta-characters. + for (SymbolVersion &Ver : Config->VersionScriptLocals) + assignExactVersion(Ver, VER_NDX_LOCAL, "local"); + for (VersionDefinition &V : Config->VersionDefinitions) + for (SymbolVersion &Ver : V.Globals) + assignExactVersion(Ver, V.Id, V.Name); + + // Next, we assign versions to fuzzy matching symbols, + // i.e. version definitions containing glob meta-characters. + // Note that because the last match takes precedence over previous matches, + // we iterate over the definitions in the reverse order. + for (SymbolVersion &Ver : Config->VersionScriptLocals) + assignWildcardVersion(Ver, VER_NDX_LOCAL); + for (VersionDefinition &V : llvm::reverse(Config->VersionDefinitions)) + for (SymbolVersion &Ver : V.Globals) + assignWildcardVersion(Ver, V.Id); +} + +template class elf::SymbolTable<ELF32LE>; +template class elf::SymbolTable<ELF32BE>; +template class elf::SymbolTable<ELF64LE>; +template class elf::SymbolTable<ELF64BE>; diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.h b/contrib/llvm/tools/lld/ELF/SymbolTable.h new file mode 100644 index 000000000000..1e5a335acc16 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/SymbolTable.h @@ -0,0 +1,151 @@ +//===- SymbolTable.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYMBOL_TABLE_H +#define LLD_ELF_SYMBOL_TABLE_H + +#include "InputFiles.h" +#include "LTO.h" +#include "Strings.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseMap.h" + +namespace lld { +namespace elf { +class Lazy; +class OutputSectionBase; +struct Symbol; + +// SymbolTable is a bucket of all known symbols, including defined, +// undefined, or lazy symbols (the last one is symbols in archive +// files whose archive members are not yet loaded). +// +// We put all symbols of all files to a SymbolTable, and the +// SymbolTable selects the "best" symbols if there are name +// conflicts. For example, obviously, a defined symbol is better than +// an undefined symbol. Or, if there's a conflict between a lazy and a +// undefined, it'll read an archive member to read a real definition +// to replace the lazy symbol. The logic is implemented in the +// add*() functions, which are called by input files as they are parsed. There +// is one add* function per symbol type. +template <class ELFT> class SymbolTable { + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + +public: + void addFile(InputFile *File); + void addCombinedLTOObject(); + + ArrayRef<Symbol *> getSymbols() const { return SymVector; } + ArrayRef<ObjectFile<ELFT> *> getObjectFiles() const { return ObjectFiles; } + ArrayRef<BinaryFile *> getBinaryFiles() const { return BinaryFiles; } + ArrayRef<SharedFile<ELFT> *> getSharedFiles() const { return SharedFiles; } + + DefinedRegular<ELFT> *addAbsolute(StringRef Name, + uint8_t Visibility = llvm::ELF::STV_HIDDEN, + uint8_t Binding = llvm::ELF::STB_GLOBAL); + DefinedRegular<ELFT> *addIgnored(StringRef Name, + uint8_t Visibility = llvm::ELF::STV_HIDDEN); + + Symbol *addUndefined(StringRef Name); + Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding, + uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, + InputFile *File); + + Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type, + uintX_t Value, uintX_t Size, uint8_t Binding, + InputSectionBase<ELFT> *Section, InputFile *File); + + Symbol *addSynthetic(StringRef N, const OutputSectionBase *Section, + uintX_t Value, uint8_t StOther); + + void addShared(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym, + const typename ELFT::Verdef *Verdef); + + void addLazyArchive(ArchiveFile *F, const llvm::object::Archive::Symbol S); + void addLazyObject(StringRef Name, LazyObjectFile &Obj); + Symbol *addBitcode(StringRef Name, uint8_t Binding, uint8_t StOther, + uint8_t Type, bool CanOmitFromDynSym, BitcodeFile *File); + + Symbol *addCommon(StringRef N, uint64_t Size, uint64_t Alignment, + uint8_t Binding, uint8_t StOther, uint8_t Type, + InputFile *File); + + void scanUndefinedFlags(); + void scanShlibUndefined(); + void scanVersionScript(); + + SymbolBody *find(StringRef Name); + + void trace(StringRef Name); + void wrap(StringRef Name); + + std::vector<InputSectionBase<ELFT> *> Sections; + +private: + std::pair<Symbol *, bool> insert(StringRef Name); + std::pair<Symbol *, bool> insert(StringRef Name, uint8_t Type, + uint8_t Visibility, bool CanOmitFromDynSym, + InputFile *File); + + std::vector<SymbolBody *> findByVersion(SymbolVersion Ver); + std::vector<SymbolBody *> findAllByVersion(SymbolVersion Ver); + + llvm::StringMap<std::vector<SymbolBody *>> &getDemangledSyms(); + void handleAnonymousVersion(); + void assignExactVersion(SymbolVersion Ver, uint16_t VersionId, + StringRef VersionName); + void assignWildcardVersion(SymbolVersion Ver, uint16_t VersionId); + + struct SymIndex { + SymIndex(int Idx, bool Traced) : Idx(Idx), Traced(Traced) {} + int Idx : 31; + unsigned Traced : 1; + }; + + // The order the global symbols are in is not defined. We can use an arbitrary + // order, but it has to be reproducible. That is true even when cross linking. + // The default hashing of StringRef produces different results on 32 and 64 + // bit systems so we use a map to a vector. That is arbitrary, deterministic + // but a bit inefficient. + // FIXME: Experiment with passing in a custom hashing or sorting the symbols + // once symbol resolution is finished. + llvm::DenseMap<llvm::CachedHashStringRef, SymIndex> Symtab; + std::vector<Symbol *> SymVector; + + // Comdat groups define "link once" sections. If two comdat groups have the + // same name, only one of them is linked, and the other is ignored. This set + // is used to uniquify them. + llvm::DenseSet<llvm::CachedHashStringRef> ComdatGroups; + + std::vector<ObjectFile<ELFT> *> ObjectFiles; + std::vector<SharedFile<ELFT> *> SharedFiles; + std::vector<BitcodeFile *> BitcodeFiles; + std::vector<BinaryFile *> BinaryFiles; + + // Set of .so files to not link the same shared object file more than once. + llvm::DenseSet<StringRef> SoNames; + + // A map from demangled symbol names to their symbol objects. + // This mapping is 1:N because two symbols with different versions + // can have the same name. We use this map to handle "extern C++ {}" + // directive in version scripts. + llvm::Optional<llvm::StringMap<std::vector<SymbolBody *>>> DemangledSyms; + + // For LTO. + std::unique_ptr<BitcodeCompiler> LTO; +}; + +template <class ELFT> struct Symtab { static SymbolTable<ELFT> *X; }; +template <class ELFT> SymbolTable<ELFT> *Symtab<ELFT>::X; + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Symbols.cpp b/contrib/llvm/tools/lld/ELF/Symbols.cpp new file mode 100644 index 000000000000..f3edafaf4b78 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Symbols.cpp @@ -0,0 +1,372 @@ +//===- Symbols.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Symbols.h" +#include "Error.h" +#include "InputFiles.h" +#include "InputSection.h" +#include "OutputSections.h" +#include "Strings.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Writer.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Path.h" +#include <cstring> + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +template <class ELFT> +static typename ELFT::uint getSymVA(const SymbolBody &Body, + typename ELFT::uint &Addend) { + typedef typename ELFT::uint uintX_t; + + switch (Body.kind()) { + case SymbolBody::DefinedSyntheticKind: { + auto &D = cast<DefinedSynthetic>(Body); + const OutputSectionBase *Sec = D.Section; + if (!Sec) + return D.Value; + if (D.Value == uintX_t(-1)) + return Sec->Addr + Sec->Size; + return Sec->Addr + D.Value; + } + case SymbolBody::DefinedRegularKind: { + auto &D = cast<DefinedRegular<ELFT>>(Body); + InputSectionBase<ELFT> *IS = D.Section; + + // According to the ELF spec reference to a local symbol from outside + // the group are not allowed. Unfortunately .eh_frame breaks that rule + // and must be treated specially. For now we just replace the symbol with + // 0. + if (IS == &InputSection<ELFT>::Discarded) + return 0; + + // This is an absolute symbol. + if (!IS) + return D.Value; + + uintX_t Offset = D.Value; + if (D.isSection()) { + Offset += Addend; + Addend = 0; + } + uintX_t VA = (IS->OutSec ? IS->OutSec->Addr : 0) + IS->getOffset(Offset); + if (D.isTls() && !Config->Relocatable) { + if (!Out<ELFT>::TlsPhdr) + fatal(toString(D.File) + + " has a STT_TLS symbol but doesn't have a PT_TLS section"); + return VA - Out<ELFT>::TlsPhdr->p_vaddr; + } + return VA; + } + case SymbolBody::DefinedCommonKind: + return In<ELFT>::Common->OutSec->Addr + In<ELFT>::Common->OutSecOff + + cast<DefinedCommon>(Body).Offset; + case SymbolBody::SharedKind: { + auto &SS = cast<SharedSymbol<ELFT>>(Body); + if (!SS.NeedsCopyOrPltAddr) + return 0; + if (SS.isFunc()) + return Body.getPltVA<ELFT>(); + return Out<ELFT>::Bss->Addr + SS.OffsetInBss; + } + case SymbolBody::UndefinedKind: + return 0; + case SymbolBody::LazyArchiveKind: + case SymbolBody::LazyObjectKind: + assert(Body.symbol()->IsUsedInRegularObj && "lazy symbol reached writer"); + return 0; + } + llvm_unreachable("invalid symbol kind"); +} + +SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type) + : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(IsLocal), + IsInGlobalMipsGot(false), Is32BitMipsGot(false), IsInIplt(false), + IsInIgot(false), Type(Type), StOther(StOther), Name(Name) {} + +// Returns true if a symbol can be replaced at load-time by a symbol +// with the same name defined in other ELF executable or DSO. +bool SymbolBody::isPreemptible() const { + if (isLocal()) + return false; + + // Shared symbols resolve to the definition in the DSO. The exceptions are + // symbols with copy relocations (which resolve to .bss) or preempt plt + // entries (which resolve to that plt entry). + if (isShared()) + return !NeedsCopyOrPltAddr; + + // That's all that can be preempted in a non-DSO. + if (!Config->Shared) + return false; + + // Only symbols that appear in dynsym can be preempted. + if (!symbol()->includeInDynsym()) + return false; + + // Only default visibility symbols can be preempted. + if (symbol()->Visibility != STV_DEFAULT) + return false; + + // -Bsymbolic means that definitions are not preempted. + if (Config->Bsymbolic || (Config->BsymbolicFunctions && isFunc())) + return !isDefined(); + return true; +} + +template <class ELFT> bool SymbolBody::hasThunk() const { + if (auto *DR = dyn_cast<DefinedRegular<ELFT>>(this)) + return DR->ThunkData != nullptr; + if (auto *S = dyn_cast<SharedSymbol<ELFT>>(this)) + return S->ThunkData != nullptr; + return false; +} + +template <class ELFT> +typename ELFT::uint SymbolBody::getVA(typename ELFT::uint Addend) const { + typename ELFT::uint OutVA = getSymVA<ELFT>(*this, Addend); + return OutVA + Addend; +} + +template <class ELFT> typename ELFT::uint SymbolBody::getGotVA() const { + return In<ELFT>::Got->getVA() + getGotOffset<ELFT>(); +} + +template <class ELFT> typename ELFT::uint SymbolBody::getGotOffset() const { + return GotIndex * Target->GotEntrySize; +} + +template <class ELFT> typename ELFT::uint SymbolBody::getGotPltVA() const { + if (this->IsInIgot) + return In<ELFT>::IgotPlt->getVA() + getGotPltOffset<ELFT>(); + return In<ELFT>::GotPlt->getVA() + getGotPltOffset<ELFT>(); +} + +template <class ELFT> typename ELFT::uint SymbolBody::getGotPltOffset() const { + return GotPltIndex * Target->GotPltEntrySize; +} + +template <class ELFT> typename ELFT::uint SymbolBody::getPltVA() const { + if (this->IsInIplt) + return In<ELFT>::Iplt->getVA() + PltIndex * Target->PltEntrySize; + return In<ELFT>::Plt->getVA() + Target->PltHeaderSize + + PltIndex * Target->PltEntrySize; +} + +template <class ELFT> typename ELFT::uint SymbolBody::getThunkVA() const { + if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(this)) + return DR->ThunkData->getVA(); + if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this)) + return S->ThunkData->getVA(); + if (const auto *S = dyn_cast<Undefined<ELFT>>(this)) + return S->ThunkData->getVA(); + fatal("getThunkVA() not supported for Symbol class\n"); +} + +template <class ELFT> typename ELFT::uint SymbolBody::getSize() const { + if (const auto *C = dyn_cast<DefinedCommon>(this)) + return C->Size; + if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(this)) + return DR->Size; + if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this)) + return S->Sym.st_size; + return 0; +} + +// If a symbol name contains '@', the characters after that is +// a symbol version name. This function parses that. +void SymbolBody::parseSymbolVersion() { + StringRef S = getName(); + size_t Pos = S.find('@'); + if (Pos == 0 || Pos == StringRef::npos) + return; + StringRef Verstr = S.substr(Pos + 1); + if (Verstr.empty()) + return; + + // Truncate the symbol name so that it doesn't include the version string. + Name = {S.data(), Pos}; + + // If this is an undefined or shared symbol it is not a definition. + if (isUndefined() || isShared()) + return; + + // '@@' in a symbol name means the default version. + // It is usually the most recent one. + bool IsDefault = (Verstr[0] == '@'); + if (IsDefault) + Verstr = Verstr.substr(1); + + for (VersionDefinition &Ver : Config->VersionDefinitions) { + if (Ver.Name != Verstr) + continue; + + if (IsDefault) + symbol()->VersionId = Ver.Id; + else + symbol()->VersionId = Ver.Id | VERSYM_HIDDEN; + return; + } + + // It is an error if the specified version is not defined. + error(toString(File) + ": symbol " + S + " has undefined version " + Verstr); +} + +Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type) + : SymbolBody(K, Name, IsLocal, StOther, Type) {} + +template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const { + if (!Section || !isFunc()) + return false; + return (this->StOther & STO_MIPS_MIPS16) == STO_MIPS_PIC || + (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC); +} + +template <typename ELFT> +Undefined<ELFT>::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type, InputFile *File) + : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) { + this->File = File; +} + +DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment, + uint8_t StOther, uint8_t Type, InputFile *File) + : Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther, + Type), + Alignment(Alignment), Size(Size) { + this->File = File; +} + +InputFile *Lazy::fetch() { + if (auto *S = dyn_cast<LazyArchive>(this)) + return S->fetch(); + return cast<LazyObject>(this)->fetch(); +} + +LazyArchive::LazyArchive(ArchiveFile &File, + const llvm::object::Archive::Symbol S, uint8_t Type) + : Lazy(LazyArchiveKind, S.getName(), Type), Sym(S) { + this->File = &File; +} + +LazyObject::LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type) + : Lazy(LazyObjectKind, Name, Type) { + this->File = &File; +} + +InputFile *LazyArchive::fetch() { + std::pair<MemoryBufferRef, uint64_t> MBInfo = file()->getMember(&Sym); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (MBInfo.first.getBuffer().empty()) + return nullptr; + return createObjectFile(MBInfo.first, file()->getName(), MBInfo.second); +} + +InputFile *LazyObject::fetch() { + MemoryBufferRef MBRef = file()->getBuffer(); + if (MBRef.getBuffer().empty()) + return nullptr; + return createObjectFile(MBRef); +} + +bool Symbol::includeInDynsym() const { + if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) + return false; + return (ExportDynamic && VersionId != VER_NDX_LOCAL) || body()->isShared() || + (body()->isUndefined() && Config->Shared); +} + +// Print out a log message for --trace-symbol. +void elf::printTraceSymbol(Symbol *Sym) { + SymbolBody *B = Sym->body(); + outs() << toString(B->File); + + if (B->isUndefined()) + outs() << ": reference to "; + else if (B->isCommon()) + outs() << ": common definition of "; + else + outs() << ": definition of "; + outs() << B->getName() << "\n"; +} + +// Returns a symbol for an error message. +std::string lld::toString(const SymbolBody &B) { + if (Config->Demangle) + if (Optional<std::string> S = demangle(B.getName())) + return *S; + return B.getName(); +} + +template bool SymbolBody::hasThunk<ELF32LE>() const; +template bool SymbolBody::hasThunk<ELF32BE>() const; +template bool SymbolBody::hasThunk<ELF64LE>() const; +template bool SymbolBody::hasThunk<ELF64BE>() const; + +template uint32_t SymbolBody::template getVA<ELF32LE>(uint32_t) const; +template uint32_t SymbolBody::template getVA<ELF32BE>(uint32_t) const; +template uint64_t SymbolBody::template getVA<ELF64LE>(uint64_t) const; +template uint64_t SymbolBody::template getVA<ELF64BE>(uint64_t) const; + +template uint32_t SymbolBody::template getGotVA<ELF32LE>() const; +template uint32_t SymbolBody::template getGotVA<ELF32BE>() const; +template uint64_t SymbolBody::template getGotVA<ELF64LE>() const; +template uint64_t SymbolBody::template getGotVA<ELF64BE>() const; + +template uint32_t SymbolBody::template getGotOffset<ELF32LE>() const; +template uint32_t SymbolBody::template getGotOffset<ELF32BE>() const; +template uint64_t SymbolBody::template getGotOffset<ELF64LE>() const; +template uint64_t SymbolBody::template getGotOffset<ELF64BE>() const; + +template uint32_t SymbolBody::template getGotPltVA<ELF32LE>() const; +template uint32_t SymbolBody::template getGotPltVA<ELF32BE>() const; +template uint64_t SymbolBody::template getGotPltVA<ELF64LE>() const; +template uint64_t SymbolBody::template getGotPltVA<ELF64BE>() const; + +template uint32_t SymbolBody::template getThunkVA<ELF32LE>() const; +template uint32_t SymbolBody::template getThunkVA<ELF32BE>() const; +template uint64_t SymbolBody::template getThunkVA<ELF64LE>() const; +template uint64_t SymbolBody::template getThunkVA<ELF64BE>() const; + +template uint32_t SymbolBody::template getGotPltOffset<ELF32LE>() const; +template uint32_t SymbolBody::template getGotPltOffset<ELF32BE>() const; +template uint64_t SymbolBody::template getGotPltOffset<ELF64LE>() const; +template uint64_t SymbolBody::template getGotPltOffset<ELF64BE>() const; + +template uint32_t SymbolBody::template getPltVA<ELF32LE>() const; +template uint32_t SymbolBody::template getPltVA<ELF32BE>() const; +template uint64_t SymbolBody::template getPltVA<ELF64LE>() const; +template uint64_t SymbolBody::template getPltVA<ELF64BE>() const; + +template uint32_t SymbolBody::template getSize<ELF32LE>() const; +template uint32_t SymbolBody::template getSize<ELF32BE>() const; +template uint64_t SymbolBody::template getSize<ELF64LE>() const; +template uint64_t SymbolBody::template getSize<ELF64BE>() const; + +template class elf::Undefined<ELF32LE>; +template class elf::Undefined<ELF32BE>; +template class elf::Undefined<ELF64LE>; +template class elf::Undefined<ELF64BE>; + +template class elf::DefinedRegular<ELF32LE>; +template class elf::DefinedRegular<ELF32BE>; +template class elf::DefinedRegular<ELF64LE>; +template class elf::DefinedRegular<ELF64BE>; diff --git a/contrib/llvm/tools/lld/ELF/Symbols.h b/contrib/llvm/tools/lld/ELF/Symbols.h new file mode 100644 index 000000000000..38889571679c --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Symbols.h @@ -0,0 +1,461 @@ +//===- Symbols.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// All symbols are handled as SymbolBodies regardless of their types. +// This file defines various types of SymbolBodies. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYMBOLS_H +#define LLD_ELF_SYMBOLS_H + +#include "InputSection.h" +#include "Strings.h" + +#include "lld/Core/LLVM.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELF.h" + +namespace lld { +namespace elf { + +class ArchiveFile; +class BitcodeFile; +class InputFile; +class LazyObjectFile; +template <class ELFT> class ObjectFile; +template <class ELFT> class OutputSection; +class OutputSectionBase; +template <class ELFT> class SharedFile; + +struct Symbol; + +// The base class for real symbol classes. +class SymbolBody { +public: + enum Kind { + DefinedFirst, + DefinedRegularKind = DefinedFirst, + SharedKind, + DefinedCommonKind, + DefinedSyntheticKind, + DefinedLast = DefinedSyntheticKind, + UndefinedKind, + LazyArchiveKind, + LazyObjectKind, + }; + + SymbolBody(Kind K) : SymbolKind(K) {} + + Symbol *symbol(); + const Symbol *symbol() const { + return const_cast<SymbolBody *>(this)->symbol(); + } + + Kind kind() const { return static_cast<Kind>(SymbolKind); } + + bool isUndefined() const { return SymbolKind == UndefinedKind; } + bool isDefined() const { return SymbolKind <= DefinedLast; } + bool isCommon() const { return SymbolKind == DefinedCommonKind; } + bool isLazy() const { + return SymbolKind == LazyArchiveKind || SymbolKind == LazyObjectKind; + } + bool isShared() const { return SymbolKind == SharedKind; } + bool isLocal() const { return IsLocal; } + bool isPreemptible() const; + StringRef getName() const { return Name; } + uint8_t getVisibility() const { return StOther & 0x3; } + void parseSymbolVersion(); + + bool isInGot() const { return GotIndex != -1U; } + bool isInPlt() const { return PltIndex != -1U; } + template <class ELFT> bool hasThunk() const; + + template <class ELFT> + typename ELFT::uint getVA(typename ELFT::uint Addend = 0) const; + + template <class ELFT> typename ELFT::uint getGotOffset() const; + template <class ELFT> typename ELFT::uint getGotVA() const; + template <class ELFT> typename ELFT::uint getGotPltOffset() const; + template <class ELFT> typename ELFT::uint getGotPltVA() const; + template <class ELFT> typename ELFT::uint getPltVA() const; + template <class ELFT> typename ELFT::uint getThunkVA() const; + template <class ELFT> typename ELFT::uint getSize() const; + + // The file from which this symbol was created. + InputFile *File = nullptr; + + uint32_t DynsymIndex = 0; + uint32_t GotIndex = -1; + uint32_t GotPltIndex = -1; + uint32_t PltIndex = -1; + uint32_t GlobalDynIndex = -1; + +protected: + SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type); + + const unsigned SymbolKind : 8; + +public: + // True if the linker has to generate a copy relocation for this shared + // symbol or if the symbol should point to its plt entry. + unsigned NeedsCopyOrPltAddr : 1; + + // True if this is a local symbol. + unsigned IsLocal : 1; + + // True if this symbol has an entry in the global part of MIPS GOT. + unsigned IsInGlobalMipsGot : 1; + + // True if this symbol is referenced by 32-bit GOT relocations. + unsigned Is32BitMipsGot : 1; + + // True if this symbol is in the Iplt sub-section of the Plt. + unsigned IsInIplt : 1; + + // True if this symbol is in the Igot sub-section of the .got.plt or .got. + unsigned IsInIgot : 1; + + // The following fields have the same meaning as the ELF symbol attributes. + uint8_t Type; // symbol type + uint8_t StOther; // st_other field value + + // The Type field may also have this value. It means that we have not yet seen + // a non-Lazy symbol with this name, so we don't know what its type is. The + // Type field is normally set to this value for Lazy symbols unless we saw a + // weak undefined symbol first, in which case we need to remember the original + // symbol's type in order to check for TLS mismatches. + enum { UnknownType = 255 }; + + bool isSection() const { return Type == llvm::ELF::STT_SECTION; } + bool isTls() const { return Type == llvm::ELF::STT_TLS; } + bool isFunc() const { return Type == llvm::ELF::STT_FUNC; } + bool isGnuIFunc() const { return Type == llvm::ELF::STT_GNU_IFUNC; } + bool isObject() const { return Type == llvm::ELF::STT_OBJECT; } + bool isFile() const { return Type == llvm::ELF::STT_FILE; } + +protected: + StringRefZ Name; +}; + +// The base class for any defined symbols. +class Defined : public SymbolBody { +public: + Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type); + static bool classof(const SymbolBody *S) { return S->isDefined(); } +}; + +class DefinedCommon : public Defined { +public: + DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, uint8_t StOther, + uint8_t Type, InputFile *File); + + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::DefinedCommonKind; + } + + // The output offset of this common symbol in the output bss. Computed by the + // writer. + uint64_t Offset; + + // The maximum alignment we have seen for this symbol. + uint64_t Alignment; + + uint64_t Size; +}; + +// Regular defined symbols read from object file symbol tables. +template <class ELFT> class DefinedRegular : public Defined { + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + +public: + DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, + uintX_t Value, uintX_t Size, InputSectionBase<ELFT> *Section, + InputFile *File) + : Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type), + Value(Value), Size(Size), + Section(Section ? Section->Repl : NullInputSection) { + this->File = File; + } + + // Return true if the symbol is a PIC function. + bool isMipsPIC() const; + + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::DefinedRegularKind; + } + + uintX_t Value; + uintX_t Size; + + // The input section this symbol belongs to. Notice that this is + // a reference to a pointer. We are using two levels of indirections + // because of ICF. If ICF decides two sections need to be merged, it + // manipulates this Section pointers so that they point to the same + // section. This is a bit tricky, so be careful to not be confused. + // If this is null, the symbol is an absolute symbol. + InputSectionBase<ELFT> *&Section; + + // If non-null the symbol has a Thunk that may be used as an alternative + // destination for callers of this Symbol. + Thunk<ELFT> *ThunkData = nullptr; + +private: + static InputSectionBase<ELFT> *NullInputSection; +}; + +template <class ELFT> +InputSectionBase<ELFT> *DefinedRegular<ELFT>::NullInputSection; + +// DefinedSynthetic is a class to represent linker-generated ELF symbols. +// The difference from the regular symbol is that DefinedSynthetic symbols +// don't belong to any input files or sections. Thus, its constructor +// takes an output section to calculate output VA, etc. +// If Section is null, this symbol is relative to the image base. +class DefinedSynthetic : public Defined { +public: + DefinedSynthetic(StringRef Name, uint64_t Value, + const OutputSectionBase *Section) + : Defined(SymbolBody::DefinedSyntheticKind, Name, /*IsLocal=*/false, + llvm::ELF::STV_HIDDEN, 0 /* Type */), + Value(Value), Section(Section) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::DefinedSyntheticKind; + } + + uint64_t Value; + const OutputSectionBase *Section; +}; + +template <class ELFT> class Undefined : public SymbolBody { +public: + Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, + InputFile *F); + + static bool classof(const SymbolBody *S) { + return S->kind() == UndefinedKind; + } + + // If non-null the symbol has a Thunk that may be used as an alternative + // destination for callers of this Symbol. When linking a DSO undefined + // symbols are implicitly imported, the symbol lookup will be performed by + // the dynamic loader. A call to an undefined symbol will be given a PLT + // entry and on ARM this may need a Thunk if the caller is in Thumb state. + Thunk<ELFT> *ThunkData = nullptr; + InputFile *file() { return this->File; } +}; + +template <class ELFT> class SharedSymbol : public Defined { + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::uint uintX_t; + +public: + static bool classof(const SymbolBody *S) { + return S->kind() == SymbolBody::SharedKind; + } + + SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym, + const Elf_Verdef *Verdef) + : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, Sym.st_other, + Sym.getType()), + Sym(Sym), Verdef(Verdef) { + // IFuncs defined in DSOs are treated as functions by the static linker. + if (isGnuIFunc()) + Type = llvm::ELF::STT_FUNC; + this->File = F; + } + + SharedFile<ELFT> *file() { return (SharedFile<ELFT> *)this->File; } + + const Elf_Sym &Sym; + + // This field is a pointer to the symbol's version definition. + const Elf_Verdef *Verdef; + + // OffsetInBss is significant only when needsCopy() is true. + uintX_t OffsetInBss = 0; + + // If non-null the symbol has a Thunk that may be used as an alternative + // destination for callers of this Symbol. + Thunk<ELFT> *ThunkData = nullptr; + bool needsCopy() const { return this->NeedsCopyOrPltAddr && !this->isFunc(); } +}; + +// This class represents a symbol defined in an archive file. It is +// created from an archive file header, and it knows how to load an +// object file from an archive to replace itself with a defined +// symbol. If the resolver finds both Undefined and Lazy for +// the same name, it will ask the Lazy to load a file. +class Lazy : public SymbolBody { +public: + static bool classof(const SymbolBody *S) { return S->isLazy(); } + + // Returns an object file for this symbol, or a nullptr if the file + // was already returned. + InputFile *fetch(); + +protected: + Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type) + : SymbolBody(K, Name, /*IsLocal=*/false, llvm::ELF::STV_DEFAULT, Type) {} +}; + +// LazyArchive symbols represents symbols in archive files. +class LazyArchive : public Lazy { +public: + LazyArchive(ArchiveFile &File, const llvm::object::Archive::Symbol S, + uint8_t Type); + + static bool classof(const SymbolBody *S) { + return S->kind() == LazyArchiveKind; + } + + ArchiveFile *file() { return (ArchiveFile *)this->File; } + InputFile *fetch(); + +private: + const llvm::object::Archive::Symbol Sym; +}; + +// LazyObject symbols represents symbols in object files between +// --start-lib and --end-lib options. +class LazyObject : public Lazy { +public: + LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type); + + static bool classof(const SymbolBody *S) { + return S->kind() == LazyObjectKind; + } + + LazyObjectFile *file() { return (LazyObjectFile *)this->File; } + InputFile *fetch(); +}; + +// Some linker-generated symbols need to be created as +// DefinedRegular symbols. +template <class ELFT> struct ElfSym { + // The content for __ehdr_start symbol. + static DefinedRegular<ELFT> *EhdrStart; + + // The content for _etext and etext symbols. + static DefinedRegular<ELFT> *Etext; + static DefinedRegular<ELFT> *Etext2; + + // The content for _edata and edata symbols. + static DefinedRegular<ELFT> *Edata; + static DefinedRegular<ELFT> *Edata2; + + // The content for _end and end symbols. + static DefinedRegular<ELFT> *End; + static DefinedRegular<ELFT> *End2; + + // The content for _gp_disp/__gnu_local_gp symbols for MIPS target. + static DefinedRegular<ELFT> *MipsGpDisp; + static DefinedRegular<ELFT> *MipsLocalGp; + static DefinedRegular<ELFT> *MipsGp; +}; + +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::EhdrStart; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext2; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata2; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End2; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsGpDisp; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsLocalGp; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsGp; + +// A real symbol object, SymbolBody, is usually stored within a Symbol. There's +// always one Symbol for each symbol name. The resolver updates the SymbolBody +// stored in the Body field of this object as it resolves symbols. Symbol also +// holds computed properties of symbol names. +struct Symbol { + // Symbol binding. This is on the Symbol to track changes during resolution. + // In particular: + // An undefined weak is still weak when it resolves to a shared library. + // An undefined weak will not fetch archive members, but we have to remember + // it is weak. + uint8_t Binding; + + // Version definition index. + uint16_t VersionId; + + // Symbol visibility. This is the computed minimum visibility of all + // observed non-DSO symbols. + unsigned Visibility : 2; + + // True if the symbol was used for linking and thus need to be added to the + // output file's symbol table. This is true for all symbols except for + // unreferenced DSO symbols and bitcode symbols that are unreferenced except + // by other bitcode objects. + unsigned IsUsedInRegularObj : 1; + + // If this flag is true and the symbol has protected or default visibility, it + // will appear in .dynsym. This flag is set by interposable DSO symbols in + // executables, by most symbols in DSOs and executables built with + // --export-dynamic, and by dynamic lists. + unsigned ExportDynamic : 1; + + // True if this symbol is specified by --trace-symbol option. + unsigned Traced : 1; + + // This symbol version was found in a version script. + unsigned InVersionScript : 1; + + bool includeInDynsym() const; + bool isWeak() const { return Binding == llvm::ELF::STB_WEAK; } + + // This field is used to store the Symbol's SymbolBody. This instantiation of + // AlignedCharArrayUnion gives us a struct with a char array field that is + // large and aligned enough to store any derived class of SymbolBody. We + // assume that the size and alignment of ELF64LE symbols is sufficient for any + // ELFT, and we verify this with the static_asserts in replaceBody. + llvm::AlignedCharArrayUnion< + DefinedCommon, DefinedRegular<llvm::object::ELF64LE>, DefinedSynthetic, + Undefined<llvm::object::ELF64LE>, SharedSymbol<llvm::object::ELF64LE>, + LazyArchive, LazyObject> + Body; + + SymbolBody *body() { return reinterpret_cast<SymbolBody *>(Body.buffer); } + const SymbolBody *body() const { return const_cast<Symbol *>(this)->body(); } +}; + +void printTraceSymbol(Symbol *Sym); + +template <typename T, typename... ArgT> +void replaceBody(Symbol *S, ArgT &&... Arg) { + static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); + static_assert(alignof(T) <= alignof(decltype(S->Body)), + "Body not aligned enough"); + assert(static_cast<SymbolBody *>(static_cast<T *>(nullptr)) == nullptr && + "Not a SymbolBody"); + + new (S->Body.buffer) T(std::forward<ArgT>(Arg)...); + + // Print out a log message if --trace-symbol was specified. + // This is for debugging. + if (S->Traced) + printTraceSymbol(S); +} + +inline Symbol *SymbolBody::symbol() { + assert(!isLocal()); + return reinterpret_cast<Symbol *>(reinterpret_cast<char *>(this) - + offsetof(Symbol, Body)); +} +} // namespace elf + +std::string toString(const elf::SymbolBody &B); +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp new file mode 100644 index 000000000000..3c8a439ba308 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp @@ -0,0 +1,1990 @@ +//===- SyntheticSections.cpp ----------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains linker-synthesized sections. Currently, +// synthetic sections are created either output sections or input sections, +// but we are rewriting code so that all synthetic sections are created as +// input sections. +// +//===----------------------------------------------------------------------===// + +#include "SyntheticSections.h" +#include "Config.h" +#include "Error.h" +#include "InputFiles.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "OutputSections.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "Target.h" +#include "Threads.h" +#include "Writer.h" +#include "lld/Config/Version.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/xxhash.h" +#include <cstdlib> + +using namespace llvm; +using namespace llvm::dwarf; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +template <class ELFT> static std::vector<DefinedCommon *> getCommonSymbols() { + std::vector<DefinedCommon *> V; + for (Symbol *S : Symtab<ELFT>::X->getSymbols()) + if (auto *B = dyn_cast<DefinedCommon>(S->body())) + V.push_back(B); + return V; +} + +// Find all common symbols and allocate space for them. +template <class ELFT> InputSection<ELFT> *elf::createCommonSection() { + auto *Ret = make<InputSection<ELFT>>(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, 1, + ArrayRef<uint8_t>(), "COMMON"); + Ret->Live = true; + + // Sort the common symbols by alignment as an heuristic to pack them better. + std::vector<DefinedCommon *> Syms = getCommonSymbols<ELFT>(); + std::stable_sort(Syms.begin(), Syms.end(), + [](const DefinedCommon *A, const DefinedCommon *B) { + return A->Alignment > B->Alignment; + }); + + // Assign offsets to symbols. + size_t Size = 0; + size_t Alignment = 1; + for (DefinedCommon *Sym : Syms) { + Alignment = std::max<size_t>(Alignment, Sym->Alignment); + Size = alignTo(Size, Sym->Alignment); + + // Compute symbol offset relative to beginning of input section. + Sym->Offset = Size; + Size += Sym->Size; + } + Ret->Alignment = Alignment; + Ret->Data = makeArrayRef<uint8_t>(nullptr, Size); + return Ret; +} + +// Returns an LLD version string. +static ArrayRef<uint8_t> getVersion() { + // Check LLD_VERSION first for ease of testing. + // You can get consitent output by using the environment variable. + // This is only for testing. + StringRef S = getenv("LLD_VERSION"); + if (S.empty()) + S = Saver.save(Twine("Linker: ") + getLLDVersion()); + + // +1 to include the terminating '\0'. + return {(const uint8_t *)S.data(), S.size() + 1}; +} + +// Creates a .comment section containing LLD version info. +// With this feature, you can identify LLD-generated binaries easily +// by "objdump -s -j .comment <file>". +// The returned object is a mergeable string section. +template <class ELFT> MergeInputSection<ELFT> *elf::createCommentSection() { + typename ELFT::Shdr Hdr = {}; + Hdr.sh_flags = SHF_MERGE | SHF_STRINGS; + Hdr.sh_type = SHT_PROGBITS; + Hdr.sh_entsize = 1; + Hdr.sh_addralign = 1; + + auto *Ret = make<MergeInputSection<ELFT>>(/*file=*/nullptr, &Hdr, ".comment"); + Ret->Data = getVersion(); + Ret->splitIntoPieces(); + return Ret; +} + +// .MIPS.abiflags section. +template <class ELFT> +MipsAbiFlagsSection<ELFT>::MipsAbiFlagsSection(Elf_Mips_ABIFlags Flags) + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"), + Flags(Flags) {} + +template <class ELFT> void MipsAbiFlagsSection<ELFT>::writeTo(uint8_t *Buf) { + memcpy(Buf, &Flags, sizeof(Flags)); +} + +template <class ELFT> +MipsAbiFlagsSection<ELFT> *MipsAbiFlagsSection<ELFT>::create() { + Elf_Mips_ABIFlags Flags = {}; + bool Create = false; + + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + if (!Sec->Live || Sec->Type != SHT_MIPS_ABIFLAGS) + continue; + Sec->Live = false; + Create = true; + + std::string Filename = toString(Sec->getFile()); + const size_t Size = Sec->Data.size(); + // Older version of BFD (such as the default FreeBSD linker) concatenate + // .MIPS.abiflags instead of merging. To allow for this case (or potential + // zero padding) we ignore everything after the first Elf_Mips_ABIFlags + if (Size < sizeof(Elf_Mips_ABIFlags)) { + error(Filename + ": invalid size of .MIPS.abiflags section: got " + + Twine(Size) + " instead of " + Twine(sizeof(Elf_Mips_ABIFlags))); + return nullptr; + } + auto *S = reinterpret_cast<const Elf_Mips_ABIFlags *>(Sec->Data.data()); + if (S->version != 0) { + error(Filename + ": unexpected .MIPS.abiflags version " + + Twine(S->version)); + return nullptr; + } + + // LLD checks ISA compatibility in getMipsEFlags(). Here we just + // select the highest number of ISA/Rev/Ext. + Flags.isa_level = std::max(Flags.isa_level, S->isa_level); + Flags.isa_rev = std::max(Flags.isa_rev, S->isa_rev); + Flags.isa_ext = std::max(Flags.isa_ext, S->isa_ext); + Flags.gpr_size = std::max(Flags.gpr_size, S->gpr_size); + Flags.cpr1_size = std::max(Flags.cpr1_size, S->cpr1_size); + Flags.cpr2_size = std::max(Flags.cpr2_size, S->cpr2_size); + Flags.ases |= S->ases; + Flags.flags1 |= S->flags1; + Flags.flags2 |= S->flags2; + Flags.fp_abi = elf::getMipsFpAbiFlag(Flags.fp_abi, S->fp_abi, Filename); + }; + + if (Create) + return make<MipsAbiFlagsSection<ELFT>>(Flags); + return nullptr; +} + +// .MIPS.options section. +template <class ELFT> +MipsOptionsSection<ELFT>::MipsOptionsSection(Elf_Mips_RegInfo Reginfo) + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"), + Reginfo(Reginfo) {} + +template <class ELFT> void MipsOptionsSection<ELFT>::writeTo(uint8_t *Buf) { + auto *Options = reinterpret_cast<Elf_Mips_Options *>(Buf); + Options->kind = ODK_REGINFO; + Options->size = getSize(); + + if (!Config->Relocatable) + Reginfo.ri_gp_value = In<ELFT>::MipsGot->getGp(); + memcpy(Buf + sizeof(Elf_Mips_Options), &Reginfo, sizeof(Reginfo)); +} + +template <class ELFT> +MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() { + // N64 ABI only. + if (!ELFT::Is64Bits) + return nullptr; + + Elf_Mips_RegInfo Reginfo = {}; + bool Create = false; + + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + if (!Sec->Live || Sec->Type != SHT_MIPS_OPTIONS) + continue; + Sec->Live = false; + Create = true; + + std::string Filename = toString(Sec->getFile()); + ArrayRef<uint8_t> D = Sec->Data; + + while (!D.empty()) { + if (D.size() < sizeof(Elf_Mips_Options)) { + error(Filename + ": invalid size of .MIPS.options section"); + break; + } + + auto *Opt = reinterpret_cast<const Elf_Mips_Options *>(D.data()); + if (Opt->kind == ODK_REGINFO) { + if (Config->Relocatable && Opt->getRegInfo().ri_gp_value) + error(Filename + ": unsupported non-zero ri_gp_value"); + Reginfo.ri_gprmask |= Opt->getRegInfo().ri_gprmask; + Sec->getFile()->MipsGp0 = Opt->getRegInfo().ri_gp_value; + break; + } + + if (!Opt->size) + fatal(Filename + ": zero option descriptor size"); + D = D.slice(Opt->size); + } + }; + + if (Create) + return make<MipsOptionsSection<ELFT>>(Reginfo); + return nullptr; +} + +// MIPS .reginfo section. +template <class ELFT> +MipsReginfoSection<ELFT>::MipsReginfoSection(Elf_Mips_RegInfo Reginfo) + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"), + Reginfo(Reginfo) {} + +template <class ELFT> void MipsReginfoSection<ELFT>::writeTo(uint8_t *Buf) { + if (!Config->Relocatable) + Reginfo.ri_gp_value = In<ELFT>::MipsGot->getGp(); + memcpy(Buf, &Reginfo, sizeof(Reginfo)); +} + +template <class ELFT> +MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() { + // Section should be alive for O32 and N32 ABIs only. + if (ELFT::Is64Bits) + return nullptr; + + Elf_Mips_RegInfo Reginfo = {}; + bool Create = false; + + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + if (!Sec->Live || Sec->Type != SHT_MIPS_REGINFO) + continue; + Sec->Live = false; + Create = true; + + if (Sec->Data.size() != sizeof(Elf_Mips_RegInfo)) { + error(toString(Sec->getFile()) + ": invalid size of .reginfo section"); + return nullptr; + } + auto *R = reinterpret_cast<const Elf_Mips_RegInfo *>(Sec->Data.data()); + if (Config->Relocatable && R->ri_gp_value) + error(toString(Sec->getFile()) + ": unsupported non-zero ri_gp_value"); + + Reginfo.ri_gprmask |= R->ri_gprmask; + Sec->getFile()->MipsGp0 = R->ri_gp_value; + }; + + if (Create) + return make<MipsReginfoSection<ELFT>>(Reginfo); + return nullptr; +} + +template <class ELFT> InputSection<ELFT> *elf::createInterpSection() { + auto *Ret = make<InputSection<ELFT>>(SHF_ALLOC, SHT_PROGBITS, 1, + ArrayRef<uint8_t>(), ".interp"); + Ret->Live = true; + + // StringSaver guarantees that the returned string ends with '\0'. + StringRef S = Saver.save(Config->DynamicLinker); + Ret->Data = {(const uint8_t *)S.data(), S.size() + 1}; + return Ret; +} + +static size_t getHashSize() { + switch (Config->BuildId) { + case BuildIdKind::Fast: + return 8; + case BuildIdKind::Md5: + case BuildIdKind::Uuid: + return 16; + case BuildIdKind::Sha1: + return 20; + case BuildIdKind::Hexstring: + return Config->BuildIdVector.size(); + default: + llvm_unreachable("unknown BuildIdKind"); + } +} + +template <class ELFT> +BuildIdSection<ELFT>::BuildIdSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_NOTE, 1, ".note.gnu.build-id"), + HashSize(getHashSize()) {} + +template <class ELFT> void BuildIdSection<ELFT>::writeTo(uint8_t *Buf) { + const endianness E = ELFT::TargetEndianness; + write32<E>(Buf, 4); // Name size + write32<E>(Buf + 4, HashSize); // Content size + write32<E>(Buf + 8, NT_GNU_BUILD_ID); // Type + memcpy(Buf + 12, "GNU", 4); // Name string + HashBuf = Buf + 16; +} + +// Split one uint8 array into small pieces of uint8 arrays. +static std::vector<ArrayRef<uint8_t>> split(ArrayRef<uint8_t> Arr, + size_t ChunkSize) { + std::vector<ArrayRef<uint8_t>> Ret; + while (Arr.size() > ChunkSize) { + Ret.push_back(Arr.take_front(ChunkSize)); + Arr = Arr.drop_front(ChunkSize); + } + if (!Arr.empty()) + Ret.push_back(Arr); + return Ret; +} + +// Computes a hash value of Data using a given hash function. +// In order to utilize multiple cores, we first split data into 1MB +// chunks, compute a hash for each chunk, and then compute a hash value +// of the hash values. +template <class ELFT> +void BuildIdSection<ELFT>::computeHash( + llvm::ArrayRef<uint8_t> Data, + std::function<void(uint8_t *Dest, ArrayRef<uint8_t> Arr)> HashFn) { + std::vector<ArrayRef<uint8_t>> Chunks = split(Data, 1024 * 1024); + std::vector<uint8_t> Hashes(Chunks.size() * HashSize); + + // Compute hash values. + forLoop(0, Chunks.size(), + [&](size_t I) { HashFn(Hashes.data() + I * HashSize, Chunks[I]); }); + + // Write to the final output buffer. + HashFn(HashBuf, Hashes); +} + +template <class ELFT> +void BuildIdSection<ELFT>::writeBuildId(ArrayRef<uint8_t> Buf) { + switch (Config->BuildId) { + case BuildIdKind::Fast: + computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) { + write64le(Dest, xxHash64(toStringRef(Arr))); + }); + break; + case BuildIdKind::Md5: + computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) { + memcpy(Dest, MD5::hash(Arr).data(), 16); + }); + break; + case BuildIdKind::Sha1: + computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) { + memcpy(Dest, SHA1::hash(Arr).data(), 20); + }); + break; + case BuildIdKind::Uuid: + if (getRandomBytes(HashBuf, HashSize)) + error("entropy source failure"); + break; + case BuildIdKind::Hexstring: + memcpy(HashBuf, Config->BuildIdVector.data(), Config->BuildIdVector.size()); + break; + default: + llvm_unreachable("unknown BuildIdKind"); + } +} + +template <class ELFT> +GotSection<ELFT>::GotSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotEntrySize, ".got") {} + +template <class ELFT> void GotSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.GotIndex = NumEntries; + ++NumEntries; +} + +template <class ELFT> bool GotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) { + if (Sym.GlobalDynIndex != -1U) + return false; + Sym.GlobalDynIndex = NumEntries; + // Global Dynamic TLS entries take two GOT slots. + NumEntries += 2; + return true; +} + +// Reserves TLS entries for a TLS module ID and a TLS block offset. +// In total it takes two GOT slots. +template <class ELFT> bool GotSection<ELFT>::addTlsIndex() { + if (TlsIndexOff != uint32_t(-1)) + return false; + TlsIndexOff = NumEntries * sizeof(uintX_t); + NumEntries += 2; + return true; +} + +template <class ELFT> +typename GotSection<ELFT>::uintX_t +GotSection<ELFT>::getGlobalDynAddr(const SymbolBody &B) const { + return this->getVA() + B.GlobalDynIndex * sizeof(uintX_t); +} + +template <class ELFT> +typename GotSection<ELFT>::uintX_t +GotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const { + return B.GlobalDynIndex * sizeof(uintX_t); +} + +template <class ELFT> void GotSection<ELFT>::finalize() { + Size = NumEntries * sizeof(uintX_t); +} + +template <class ELFT> bool GotSection<ELFT>::empty() const { + // If we have a relocation that is relative to GOT (such as GOTOFFREL), + // we need to emit a GOT even if it's empty. + return NumEntries == 0 && !HasGotOffRel; +} + +template <class ELFT> void GotSection<ELFT>::writeTo(uint8_t *Buf) { + this->relocate(Buf, Buf + Size); +} + +template <class ELFT> +MipsGotSection<ELFT>::MipsGotSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, + SHT_PROGBITS, Target->GotEntrySize, ".got") {} + +template <class ELFT> +void MipsGotSection<ELFT>::addEntry(SymbolBody &Sym, uintX_t Addend, + RelExpr Expr) { + // For "true" local symbols which can be referenced from the same module + // only compiler creates two instructions for address loading: + // + // lw $8, 0($gp) # R_MIPS_GOT16 + // addi $8, $8, 0 # R_MIPS_LO16 + // + // The first instruction loads high 16 bits of the symbol address while + // the second adds an offset. That allows to reduce number of required + // GOT entries because only one global offset table entry is necessary + // for every 64 KBytes of local data. So for local symbols we need to + // allocate number of GOT entries to hold all required "page" addresses. + // + // All global symbols (hidden and regular) considered by compiler uniformly. + // It always generates a single `lw` instruction and R_MIPS_GOT16 relocation + // to load address of the symbol. So for each such symbol we need to + // allocate dedicated GOT entry to store its address. + // + // If a symbol is preemptible we need help of dynamic linker to get its + // final address. The corresponding GOT entries are allocated in the + // "global" part of GOT. Entries for non preemptible global symbol allocated + // in the "local" part of GOT. + // + // See "Global Offset Table" in Chapter 5: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Expr == R_MIPS_GOT_LOCAL_PAGE) { + // At this point we do not know final symbol value so to reduce number + // of allocated GOT entries do the following trick. Save all output + // sections referenced by GOT relocations. Then later in the `finalize` + // method calculate number of "pages" required to cover all saved output + // section and allocate appropriate number of GOT entries. + PageIndexMap.insert({cast<DefinedRegular<ELFT>>(&Sym)->Section->OutSec, 0}); + return; + } + if (Sym.isTls()) { + // GOT entries created for MIPS TLS relocations behave like + // almost GOT entries from other ABIs. They go to the end + // of the global offset table. + Sym.GotIndex = TlsEntries.size(); + TlsEntries.push_back(&Sym); + return; + } + auto AddEntry = [&](SymbolBody &S, uintX_t A, GotEntries &Items) { + if (S.isInGot() && !A) + return; + size_t NewIndex = Items.size(); + if (!EntryIndexMap.insert({{&S, A}, NewIndex}).second) + return; + Items.emplace_back(&S, A); + if (!A) + S.GotIndex = NewIndex; + }; + if (Sym.isPreemptible()) { + // Ignore addends for preemptible symbols. They got single GOT entry anyway. + AddEntry(Sym, 0, GlobalEntries); + Sym.IsInGlobalMipsGot = true; + } else if (Expr == R_MIPS_GOT_OFF32) { + AddEntry(Sym, Addend, LocalEntries32); + Sym.Is32BitMipsGot = true; + } else { + // Hold local GOT entries accessed via a 16-bit index separately. + // That allows to write them in the beginning of the GOT and keep + // their indexes as less as possible to escape relocation's overflow. + AddEntry(Sym, Addend, LocalEntries); + } +} + +template <class ELFT> +bool MipsGotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) { + if (Sym.GlobalDynIndex != -1U) + return false; + Sym.GlobalDynIndex = TlsEntries.size(); + // Global Dynamic TLS entries take two GOT slots. + TlsEntries.push_back(nullptr); + TlsEntries.push_back(&Sym); + return true; +} + +// Reserves TLS entries for a TLS module ID and a TLS block offset. +// In total it takes two GOT slots. +template <class ELFT> bool MipsGotSection<ELFT>::addTlsIndex() { + if (TlsIndexOff != uint32_t(-1)) + return false; + TlsIndexOff = TlsEntries.size() * sizeof(uintX_t); + TlsEntries.push_back(nullptr); + TlsEntries.push_back(nullptr); + return true; +} + +static uint64_t getMipsPageAddr(uint64_t Addr) { + return (Addr + 0x8000) & ~0xffff; +} + +static uint64_t getMipsPageCount(uint64_t Size) { + return (Size + 0xfffe) / 0xffff + 1; +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getPageEntryOffset(const SymbolBody &B, + uintX_t Addend) const { + const OutputSectionBase *OutSec = + cast<DefinedRegular<ELFT>>(&B)->Section->OutSec; + uintX_t SecAddr = getMipsPageAddr(OutSec->Addr); + uintX_t SymAddr = getMipsPageAddr(B.getVA<ELFT>(Addend)); + uintX_t Index = PageIndexMap.lookup(OutSec) + (SymAddr - SecAddr) / 0xffff; + assert(Index < PageEntriesNum); + return (HeaderEntriesNum + Index) * sizeof(uintX_t); +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getBodyEntryOffset(const SymbolBody &B, + uintX_t Addend) const { + // Calculate offset of the GOT entries block: TLS, global, local. + uintX_t Index = HeaderEntriesNum + PageEntriesNum; + if (B.isTls()) + Index += LocalEntries.size() + LocalEntries32.size() + GlobalEntries.size(); + else if (B.IsInGlobalMipsGot) + Index += LocalEntries.size() + LocalEntries32.size(); + else if (B.Is32BitMipsGot) + Index += LocalEntries.size(); + // Calculate offset of the GOT entry in the block. + if (B.isInGot()) + Index += B.GotIndex; + else { + auto It = EntryIndexMap.find({&B, Addend}); + assert(It != EntryIndexMap.end()); + Index += It->second; + } + return Index * sizeof(uintX_t); +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getTlsOffset() const { + return (getLocalEntriesNum() + GlobalEntries.size()) * sizeof(uintX_t); +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const { + return B.GlobalDynIndex * sizeof(uintX_t); +} + +template <class ELFT> +const SymbolBody *MipsGotSection<ELFT>::getFirstGlobalEntry() const { + return GlobalEntries.empty() ? nullptr : GlobalEntries.front().first; +} + +template <class ELFT> +unsigned MipsGotSection<ELFT>::getLocalEntriesNum() const { + return HeaderEntriesNum + PageEntriesNum + LocalEntries.size() + + LocalEntries32.size(); +} + +template <class ELFT> void MipsGotSection<ELFT>::finalize() { + PageEntriesNum = 0; + for (std::pair<const OutputSectionBase *, size_t> &P : PageIndexMap) { + // For each output section referenced by GOT page relocations calculate + // and save into PageIndexMap an upper bound of MIPS GOT entries required + // to store page addresses of local symbols. We assume the worst case - + // each 64kb page of the output section has at least one GOT relocation + // against it. And take in account the case when the section intersects + // page boundaries. + P.second = PageEntriesNum; + PageEntriesNum += getMipsPageCount(P.first->Size); + } + Size = (getLocalEntriesNum() + GlobalEntries.size() + TlsEntries.size()) * + sizeof(uintX_t); +} + +template <class ELFT> bool MipsGotSection<ELFT>::empty() const { + // We add the .got section to the result for dynamic MIPS target because + // its address and properties are mentioned in the .dynamic section. + return Config->Relocatable; +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t MipsGotSection<ELFT>::getGp() const { + return ElfSym<ELFT>::MipsGp->template getVA<ELFT>(0); +} + +template <class ELFT> +static void writeUint(uint8_t *Buf, typename ELFT::uint Val) { + typedef typename ELFT::uint uintX_t; + write<uintX_t, ELFT::TargetEndianness, sizeof(uintX_t)>(Buf, Val); +} + +template <class ELFT> void MipsGotSection<ELFT>::writeTo(uint8_t *Buf) { + // Set the MSB of the second GOT slot. This is not required by any + // MIPS ABI documentation, though. + // + // There is a comment in glibc saying that "The MSB of got[1] of a + // gnu object is set to identify gnu objects," and in GNU gold it + // says "the second entry will be used by some runtime loaders". + // But how this field is being used is unclear. + // + // We are not really willing to mimic other linkers behaviors + // without understanding why they do that, but because all files + // generated by GNU tools have this special GOT value, and because + // we've been doing this for years, it is probably a safe bet to + // keep doing this for now. We really need to revisit this to see + // if we had to do this. + auto *P = reinterpret_cast<typename ELFT::Off *>(Buf); + P[1] = uintX_t(1) << (ELFT::Is64Bits ? 63 : 31); + Buf += HeaderEntriesNum * sizeof(uintX_t); + // Write 'page address' entries to the local part of the GOT. + for (std::pair<const OutputSectionBase *, size_t> &L : PageIndexMap) { + size_t PageCount = getMipsPageCount(L.first->Size); + uintX_t FirstPageAddr = getMipsPageAddr(L.first->Addr); + for (size_t PI = 0; PI < PageCount; ++PI) { + uint8_t *Entry = Buf + (L.second + PI) * sizeof(uintX_t); + writeUint<ELFT>(Entry, FirstPageAddr + PI * 0x10000); + } + } + Buf += PageEntriesNum * sizeof(uintX_t); + auto AddEntry = [&](const GotEntry &SA) { + uint8_t *Entry = Buf; + Buf += sizeof(uintX_t); + const SymbolBody *Body = SA.first; + uintX_t VA = Body->template getVA<ELFT>(SA.second); + writeUint<ELFT>(Entry, VA); + }; + std::for_each(std::begin(LocalEntries), std::end(LocalEntries), AddEntry); + std::for_each(std::begin(LocalEntries32), std::end(LocalEntries32), AddEntry); + std::for_each(std::begin(GlobalEntries), std::end(GlobalEntries), AddEntry); + // Initialize TLS-related GOT entries. If the entry has a corresponding + // dynamic relocations, leave it initialized by zero. Write down adjusted + // TLS symbol's values otherwise. To calculate the adjustments use offsets + // for thread-local storage. + // https://www.linux-mips.org/wiki/NPTL + if (TlsIndexOff != -1U && !Config->Pic) + writeUint<ELFT>(Buf + TlsIndexOff, 1); + for (const SymbolBody *B : TlsEntries) { + if (!B || B->isPreemptible()) + continue; + uintX_t VA = B->getVA<ELFT>(); + if (B->GotIndex != -1U) { + uint8_t *Entry = Buf + B->GotIndex * sizeof(uintX_t); + writeUint<ELFT>(Entry, VA - 0x7000); + } + if (B->GlobalDynIndex != -1U) { + uint8_t *Entry = Buf + B->GlobalDynIndex * sizeof(uintX_t); + writeUint<ELFT>(Entry, 1); + Entry += sizeof(uintX_t); + writeUint<ELFT>(Entry, VA - 0x8000); + } + } +} + +template <class ELFT> +GotPltSection<ELFT>::GotPltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotPltEntrySize, ".got.plt") {} + +template <class ELFT> void GotPltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.GotPltIndex = Target->GotPltHeaderEntriesNum + Entries.size(); + Entries.push_back(&Sym); +} + +template <class ELFT> size_t GotPltSection<ELFT>::getSize() const { + return (Target->GotPltHeaderEntriesNum + Entries.size()) * + Target->GotPltEntrySize; +} + +template <class ELFT> void GotPltSection<ELFT>::writeTo(uint8_t *Buf) { + Target->writeGotPltHeader(Buf); + Buf += Target->GotPltHeaderEntriesNum * Target->GotPltEntrySize; + for (const SymbolBody *B : Entries) { + Target->writeGotPlt(Buf, *B); + Buf += sizeof(uintX_t); + } +} + +// On ARM the IgotPltSection is part of the GotSection, on other Targets it is +// part of the .got.plt +template <class ELFT> +IgotPltSection<ELFT>::IgotPltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotPltEntrySize, + Config->EMachine == EM_ARM ? ".got" : ".got.plt") { +} + +template <class ELFT> void IgotPltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.IsInIgot = true; + Sym.GotPltIndex = Entries.size(); + Entries.push_back(&Sym); +} + +template <class ELFT> size_t IgotPltSection<ELFT>::getSize() const { + return Entries.size() * Target->GotPltEntrySize; +} + +template <class ELFT> void IgotPltSection<ELFT>::writeTo(uint8_t *Buf) { + for (const SymbolBody *B : Entries) { + Target->writeIgotPlt(Buf, *B); + Buf += sizeof(uintX_t); + } +} + +template <class ELFT> +StringTableSection<ELFT>::StringTableSection(StringRef Name, bool Dynamic) + : SyntheticSection<ELFT>(Dynamic ? (uintX_t)SHF_ALLOC : 0, SHT_STRTAB, 1, + Name), + Dynamic(Dynamic) {} + +// Adds a string to the string table. If HashIt is true we hash and check for +// duplicates. It is optional because the name of global symbols are already +// uniqued and hashing them again has a big cost for a small value: uniquing +// them with some other string that happens to be the same. +template <class ELFT> +unsigned StringTableSection<ELFT>::addString(StringRef S, bool HashIt) { + if (HashIt) { + auto R = StringMap.insert(std::make_pair(S, this->Size)); + if (!R.second) + return R.first->second; + } + unsigned Ret = this->Size; + this->Size = this->Size + S.size() + 1; + Strings.push_back(S); + return Ret; +} + +template <class ELFT> void StringTableSection<ELFT>::writeTo(uint8_t *Buf) { + // ELF string tables start with NUL byte, so advance the pointer by one. + ++Buf; + for (StringRef S : Strings) { + memcpy(Buf, S.data(), S.size()); + Buf += S.size() + 1; + } +} + +// Returns the number of version definition entries. Because the first entry +// is for the version definition itself, it is the number of versioned symbols +// plus one. Note that we don't support multiple versions yet. +static unsigned getVerDefNum() { return Config->VersionDefinitions.size() + 1; } + +template <class ELFT> +DynamicSection<ELFT>::DynamicSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, + sizeof(uintX_t), ".dynamic") { + this->Entsize = ELFT::Is64Bits ? 16 : 8; + // .dynamic section is not writable on MIPS. + // See "Special Section" in Chapter 4 in the following document: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Config->EMachine == EM_MIPS) + this->Flags = SHF_ALLOC; + + addEntries(); +} + +// There are some dynamic entries that don't depend on other sections. +// Such entries can be set early. +template <class ELFT> void DynamicSection<ELFT>::addEntries() { + // Add strings to .dynstr early so that .dynstr's size will be + // fixed early. + for (StringRef S : Config->AuxiliaryList) + add({DT_AUXILIARY, In<ELFT>::DynStrTab->addString(S)}); + if (!Config->RPath.empty()) + add({Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH, + In<ELFT>::DynStrTab->addString(Config->RPath)}); + for (SharedFile<ELFT> *F : Symtab<ELFT>::X->getSharedFiles()) + if (F->isNeeded()) + add({DT_NEEDED, In<ELFT>::DynStrTab->addString(F->getSoName())}); + if (!Config->SoName.empty()) + add({DT_SONAME, In<ELFT>::DynStrTab->addString(Config->SoName)}); + + // Set DT_FLAGS and DT_FLAGS_1. + uint32_t DtFlags = 0; + uint32_t DtFlags1 = 0; + if (Config->Bsymbolic) + DtFlags |= DF_SYMBOLIC; + if (Config->ZNodelete) + DtFlags1 |= DF_1_NODELETE; + if (Config->ZNow) { + DtFlags |= DF_BIND_NOW; + DtFlags1 |= DF_1_NOW; + } + if (Config->ZOrigin) { + DtFlags |= DF_ORIGIN; + DtFlags1 |= DF_1_ORIGIN; + } + + if (DtFlags) + add({DT_FLAGS, DtFlags}); + if (DtFlags1) + add({DT_FLAGS_1, DtFlags1}); + + if (!Config->Shared && !Config->Relocatable) + add({DT_DEBUG, (uint64_t)0}); +} + +// Add remaining entries to complete .dynamic contents. +template <class ELFT> void DynamicSection<ELFT>::finalize() { + if (this->Size) + return; // Already finalized. + + this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + if (In<ELFT>::RelaDyn->OutSec->Size > 0) { + bool IsRela = Config->Rela; + add({IsRela ? DT_RELA : DT_REL, In<ELFT>::RelaDyn}); + add({IsRela ? DT_RELASZ : DT_RELSZ, In<ELFT>::RelaDyn->OutSec->Size}); + add({IsRela ? DT_RELAENT : DT_RELENT, + uintX_t(IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel))}); + + // MIPS dynamic loader does not support RELCOUNT tag. + // The problem is in the tight relation between dynamic + // relocations and GOT. So do not emit this tag on MIPS. + if (Config->EMachine != EM_MIPS) { + size_t NumRelativeRels = In<ELFT>::RelaDyn->getRelativeRelocCount(); + if (Config->ZCombreloc && NumRelativeRels) + add({IsRela ? DT_RELACOUNT : DT_RELCOUNT, NumRelativeRels}); + } + } + if (In<ELFT>::RelaPlt->OutSec->Size > 0) { + add({DT_JMPREL, In<ELFT>::RelaPlt}); + add({DT_PLTRELSZ, In<ELFT>::RelaPlt->OutSec->Size}); + add({Config->EMachine == EM_MIPS ? DT_MIPS_PLTGOT : DT_PLTGOT, + In<ELFT>::GotPlt}); + add({DT_PLTREL, uint64_t(Config->Rela ? DT_RELA : DT_REL)}); + } + + add({DT_SYMTAB, In<ELFT>::DynSymTab}); + add({DT_SYMENT, sizeof(Elf_Sym)}); + add({DT_STRTAB, In<ELFT>::DynStrTab}); + add({DT_STRSZ, In<ELFT>::DynStrTab->getSize()}); + if (In<ELFT>::GnuHashTab) + add({DT_GNU_HASH, In<ELFT>::GnuHashTab}); + if (In<ELFT>::HashTab) + add({DT_HASH, In<ELFT>::HashTab}); + + if (Out<ELFT>::PreinitArray) { + add({DT_PREINIT_ARRAY, Out<ELFT>::PreinitArray}); + add({DT_PREINIT_ARRAYSZ, Out<ELFT>::PreinitArray, Entry::SecSize}); + } + if (Out<ELFT>::InitArray) { + add({DT_INIT_ARRAY, Out<ELFT>::InitArray}); + add({DT_INIT_ARRAYSZ, Out<ELFT>::InitArray, Entry::SecSize}); + } + if (Out<ELFT>::FiniArray) { + add({DT_FINI_ARRAY, Out<ELFT>::FiniArray}); + add({DT_FINI_ARRAYSZ, Out<ELFT>::FiniArray, Entry::SecSize}); + } + + if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Init)) + add({DT_INIT, B}); + if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Fini)) + add({DT_FINI, B}); + + bool HasVerNeed = In<ELFT>::VerNeed->getNeedNum() != 0; + if (HasVerNeed || In<ELFT>::VerDef) + add({DT_VERSYM, In<ELFT>::VerSym}); + if (In<ELFT>::VerDef) { + add({DT_VERDEF, In<ELFT>::VerDef}); + add({DT_VERDEFNUM, getVerDefNum()}); + } + if (HasVerNeed) { + add({DT_VERNEED, In<ELFT>::VerNeed}); + add({DT_VERNEEDNUM, In<ELFT>::VerNeed->getNeedNum()}); + } + + if (Config->EMachine == EM_MIPS) { + add({DT_MIPS_RLD_VERSION, 1}); + add({DT_MIPS_FLAGS, RHF_NOTPOT}); + add({DT_MIPS_BASE_ADDRESS, Config->ImageBase}); + add({DT_MIPS_SYMTABNO, In<ELFT>::DynSymTab->getNumSymbols()}); + add({DT_MIPS_LOCAL_GOTNO, In<ELFT>::MipsGot->getLocalEntriesNum()}); + if (const SymbolBody *B = In<ELFT>::MipsGot->getFirstGlobalEntry()) + add({DT_MIPS_GOTSYM, B->DynsymIndex}); + else + add({DT_MIPS_GOTSYM, In<ELFT>::DynSymTab->getNumSymbols()}); + add({DT_PLTGOT, In<ELFT>::MipsGot}); + if (In<ELFT>::MipsRldMap) + add({DT_MIPS_RLD_MAP, In<ELFT>::MipsRldMap}); + } + + this->OutSec->Entsize = this->Entsize; + this->OutSec->Link = this->Link; + + // +1 for DT_NULL + this->Size = (Entries.size() + 1) * this->Entsize; +} + +template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *Buf) { + auto *P = reinterpret_cast<Elf_Dyn *>(Buf); + + for (const Entry &E : Entries) { + P->d_tag = E.Tag; + switch (E.Kind) { + case Entry::SecAddr: + P->d_un.d_ptr = E.OutSec->Addr; + break; + case Entry::InSecAddr: + P->d_un.d_ptr = E.InSec->OutSec->Addr + E.InSec->OutSecOff; + break; + case Entry::SecSize: + P->d_un.d_val = E.OutSec->Size; + break; + case Entry::SymAddr: + P->d_un.d_ptr = E.Sym->template getVA<ELFT>(); + break; + case Entry::PlainInt: + P->d_un.d_val = E.Val; + break; + } + ++P; + } +} + +template <class ELFT> +typename ELFT::uint DynamicReloc<ELFT>::getOffset() const { + if (OutputSec) + return OutputSec->Addr + OffsetInSec; + return InputSec->OutSec->Addr + InputSec->getOffset(OffsetInSec); +} + +template <class ELFT> +typename ELFT::uint DynamicReloc<ELFT>::getAddend() const { + if (UseSymVA) + return Sym->getVA<ELFT>(Addend); + return Addend; +} + +template <class ELFT> uint32_t DynamicReloc<ELFT>::getSymIndex() const { + if (Sym && !UseSymVA) + return Sym->DynsymIndex; + return 0; +} + +template <class ELFT> +RelocationSection<ELFT>::RelocationSection(StringRef Name, bool Sort) + : SyntheticSection<ELFT>(SHF_ALLOC, Config->Rela ? SHT_RELA : SHT_REL, + sizeof(uintX_t), Name), + Sort(Sort) { + this->Entsize = Config->Rela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); +} + +template <class ELFT> +void RelocationSection<ELFT>::addReloc(const DynamicReloc<ELFT> &Reloc) { + if (Reloc.Type == Target->RelativeRel) + ++NumRelativeRelocs; + Relocs.push_back(Reloc); +} + +template <class ELFT, class RelTy> +static bool compRelocations(const RelTy &A, const RelTy &B) { + bool AIsRel = A.getType(Config->Mips64EL) == Target->RelativeRel; + bool BIsRel = B.getType(Config->Mips64EL) == Target->RelativeRel; + if (AIsRel != BIsRel) + return AIsRel; + + return A.getSymbol(Config->Mips64EL) < B.getSymbol(Config->Mips64EL); +} + +template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) { + uint8_t *BufBegin = Buf; + for (const DynamicReloc<ELFT> &Rel : Relocs) { + auto *P = reinterpret_cast<Elf_Rela *>(Buf); + Buf += Config->Rela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); + + if (Config->Rela) + P->r_addend = Rel.getAddend(); + P->r_offset = Rel.getOffset(); + if (Config->EMachine == EM_MIPS && Rel.getInputSec() == In<ELFT>::MipsGot) + // Dynamic relocation against MIPS GOT section make deal TLS entries + // allocated in the end of the GOT. We need to adjust the offset to take + // in account 'local' and 'global' GOT entries. + P->r_offset += In<ELFT>::MipsGot->getTlsOffset(); + P->setSymbolAndType(Rel.getSymIndex(), Rel.Type, Config->Mips64EL); + } + + if (Sort) { + if (Config->Rela) + std::stable_sort((Elf_Rela *)BufBegin, + (Elf_Rela *)BufBegin + Relocs.size(), + compRelocations<ELFT, Elf_Rela>); + else + std::stable_sort((Elf_Rel *)BufBegin, (Elf_Rel *)BufBegin + Relocs.size(), + compRelocations<ELFT, Elf_Rel>); + } +} + +template <class ELFT> unsigned RelocationSection<ELFT>::getRelocOffset() { + return this->Entsize * Relocs.size(); +} + +template <class ELFT> void RelocationSection<ELFT>::finalize() { + this->Link = In<ELFT>::DynSymTab ? In<ELFT>::DynSymTab->OutSec->SectionIndex + : In<ELFT>::SymTab->OutSec->SectionIndex; + + // Set required output section properties. + this->OutSec->Link = this->Link; + this->OutSec->Entsize = this->Entsize; +} + +template <class ELFT> +SymbolTableSection<ELFT>::SymbolTableSection( + StringTableSection<ELFT> &StrTabSec) + : SyntheticSection<ELFT>(StrTabSec.isDynamic() ? (uintX_t)SHF_ALLOC : 0, + StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, + sizeof(uintX_t), + StrTabSec.isDynamic() ? ".dynsym" : ".symtab"), + StrTabSec(StrTabSec) { + this->Entsize = sizeof(Elf_Sym); +} + +// Orders symbols according to their positions in the GOT, +// in compliance with MIPS ABI rules. +// See "Global Offset Table" in Chapter 5 in the following document +// for detailed description: +// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf +static bool sortMipsSymbols(const SymbolBody *L, const SymbolBody *R) { + // Sort entries related to non-local preemptible symbols by GOT indexes. + // All other entries go to the first part of GOT in arbitrary order. + bool LIsInLocalGot = !L->IsInGlobalMipsGot; + bool RIsInLocalGot = !R->IsInGlobalMipsGot; + if (LIsInLocalGot || RIsInLocalGot) + return !RIsInLocalGot; + return L->GotIndex < R->GotIndex; +} + +static uint8_t getSymbolBinding(SymbolBody *Body) { + Symbol *S = Body->symbol(); + if (Config->Relocatable) + return S->Binding; + uint8_t Visibility = S->Visibility; + if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) + return STB_LOCAL; + if (Config->NoGnuUnique && S->Binding == STB_GNU_UNIQUE) + return STB_GLOBAL; + return S->Binding; +} + +template <class ELFT> void SymbolTableSection<ELFT>::finalize() { + this->OutSec->Link = this->Link = StrTabSec.OutSec->SectionIndex; + this->OutSec->Info = this->Info = NumLocals + 1; + this->OutSec->Entsize = this->Entsize; + + if (Config->Relocatable) { + size_t I = NumLocals; + for (const SymbolTableEntry &S : Symbols) + S.Symbol->DynsymIndex = ++I; + return; + } + + if (!StrTabSec.isDynamic()) { + std::stable_sort(Symbols.begin(), Symbols.end(), + [](const SymbolTableEntry &L, const SymbolTableEntry &R) { + return getSymbolBinding(L.Symbol) == STB_LOCAL && + getSymbolBinding(R.Symbol) != STB_LOCAL; + }); + return; + } + if (In<ELFT>::GnuHashTab) + // NB: It also sorts Symbols to meet the GNU hash table requirements. + In<ELFT>::GnuHashTab->addSymbols(Symbols); + else if (Config->EMachine == EM_MIPS) + std::stable_sort(Symbols.begin(), Symbols.end(), + [](const SymbolTableEntry &L, const SymbolTableEntry &R) { + return sortMipsSymbols(L.Symbol, R.Symbol); + }); + size_t I = 0; + for (const SymbolTableEntry &S : Symbols) + S.Symbol->DynsymIndex = ++I; +} + +template <class ELFT> void SymbolTableSection<ELFT>::addSymbol(SymbolBody *B) { + Symbols.push_back({B, StrTabSec.addString(B->getName(), false)}); +} + +template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) { + Buf += sizeof(Elf_Sym); + + // All symbols with STB_LOCAL binding precede the weak and global symbols. + // .dynsym only contains global symbols. + if (Config->Discard != DiscardPolicy::All && !StrTabSec.isDynamic()) + writeLocalSymbols(Buf); + + writeGlobalSymbols(Buf); +} + +template <class ELFT> +void SymbolTableSection<ELFT>::writeLocalSymbols(uint8_t *&Buf) { + // Iterate over all input object files to copy their local symbols + // to the output symbol table pointed by Buf. + for (ObjectFile<ELFT> *File : Symtab<ELFT>::X->getObjectFiles()) { + for (const std::pair<const DefinedRegular<ELFT> *, size_t> &P : + File->KeptLocalSyms) { + const DefinedRegular<ELFT> &Body = *P.first; + InputSectionBase<ELFT> *Section = Body.Section; + auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + + if (!Section) { + ESym->st_shndx = SHN_ABS; + ESym->st_value = Body.Value; + } else { + const OutputSectionBase *OutSec = Section->OutSec; + ESym->st_shndx = OutSec->SectionIndex; + ESym->st_value = OutSec->Addr + Section->getOffset(Body); + } + ESym->st_name = P.second; + ESym->st_size = Body.template getSize<ELFT>(); + ESym->setBindingAndType(STB_LOCAL, Body.Type); + Buf += sizeof(*ESym); + } + } +} + +template <class ELFT> +void SymbolTableSection<ELFT>::writeGlobalSymbols(uint8_t *Buf) { + // Write the internal symbol table contents to the output symbol table + // pointed by Buf. + auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + for (const SymbolTableEntry &S : Symbols) { + SymbolBody *Body = S.Symbol; + size_t StrOff = S.StrTabOffset; + + uint8_t Type = Body->Type; + uintX_t Size = Body->getSize<ELFT>(); + + ESym->setBindingAndType(getSymbolBinding(Body), Type); + ESym->st_size = Size; + ESym->st_name = StrOff; + ESym->setVisibility(Body->symbol()->Visibility); + ESym->st_value = Body->getVA<ELFT>(); + + if (const OutputSectionBase *OutSec = getOutputSection(Body)) + ESym->st_shndx = OutSec->SectionIndex; + else if (isa<DefinedRegular<ELFT>>(Body)) + ESym->st_shndx = SHN_ABS; + + if (Config->EMachine == EM_MIPS) { + // On MIPS we need to mark symbol which has a PLT entry and requires + // pointer equality by STO_MIPS_PLT flag. That is necessary to help + // dynamic linker distinguish such symbols and MIPS lazy-binding stubs. + // https://sourceware.org/ml/binutils/2008-07/txt00000.txt + if (Body->isInPlt() && Body->NeedsCopyOrPltAddr) + ESym->st_other |= STO_MIPS_PLT; + if (Config->Relocatable) { + auto *D = dyn_cast<DefinedRegular<ELFT>>(Body); + if (D && D->isMipsPIC()) + ESym->st_other |= STO_MIPS_PIC; + } + } + ++ESym; + } +} + +template <class ELFT> +const OutputSectionBase * +SymbolTableSection<ELFT>::getOutputSection(SymbolBody *Sym) { + switch (Sym->kind()) { + case SymbolBody::DefinedSyntheticKind: + return cast<DefinedSynthetic>(Sym)->Section; + case SymbolBody::DefinedRegularKind: { + auto &D = cast<DefinedRegular<ELFT>>(*Sym); + if (D.Section) + return D.Section->OutSec; + break; + } + case SymbolBody::DefinedCommonKind: + return In<ELFT>::Common->OutSec; + case SymbolBody::SharedKind: + if (cast<SharedSymbol<ELFT>>(Sym)->needsCopy()) + return Out<ELFT>::Bss; + break; + case SymbolBody::UndefinedKind: + case SymbolBody::LazyArchiveKind: + case SymbolBody::LazyObjectKind: + break; + } + return nullptr; +} + +template <class ELFT> +GnuHashTableSection<ELFT>::GnuHashTableSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_HASH, sizeof(uintX_t), + ".gnu.hash") { + this->Entsize = ELFT::Is64Bits ? 0 : 4; +} + +template <class ELFT> +unsigned GnuHashTableSection<ELFT>::calcNBuckets(unsigned NumHashed) { + if (!NumHashed) + return 0; + + // These values are prime numbers which are not greater than 2^(N-1) + 1. + // In result, for any particular NumHashed we return a prime number + // which is not greater than NumHashed. + static const unsigned Primes[] = { + 1, 1, 3, 3, 7, 13, 31, 61, 127, 251, + 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071}; + + return Primes[std::min<unsigned>(Log2_32_Ceil(NumHashed), + array_lengthof(Primes) - 1)]; +} + +// Bloom filter estimation: at least 8 bits for each hashed symbol. +// GNU Hash table requirement: it should be a power of 2, +// the minimum value is 1, even for an empty table. +// Expected results for a 32-bit target: +// calcMaskWords(0..4) = 1 +// calcMaskWords(5..8) = 2 +// calcMaskWords(9..16) = 4 +// For a 64-bit target: +// calcMaskWords(0..8) = 1 +// calcMaskWords(9..16) = 2 +// calcMaskWords(17..32) = 4 +template <class ELFT> +unsigned GnuHashTableSection<ELFT>::calcMaskWords(unsigned NumHashed) { + if (!NumHashed) + return 1; + return NextPowerOf2((NumHashed - 1) / sizeof(Elf_Off)); +} + +template <class ELFT> void GnuHashTableSection<ELFT>::finalize() { + unsigned NumHashed = Symbols.size(); + NBuckets = calcNBuckets(NumHashed); + MaskWords = calcMaskWords(NumHashed); + // Second hash shift estimation: just predefined values. + Shift2 = ELFT::Is64Bits ? 6 : 5; + + this->OutSec->Entsize = this->Entsize; + this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; + this->Size = sizeof(Elf_Word) * 4 // Header + + sizeof(Elf_Off) * MaskWords // Bloom Filter + + sizeof(Elf_Word) * NBuckets // Hash Buckets + + sizeof(Elf_Word) * NumHashed; // Hash Values +} + +template <class ELFT> void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) { + writeHeader(Buf); + if (Symbols.empty()) + return; + writeBloomFilter(Buf); + writeHashTable(Buf); +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeHeader(uint8_t *&Buf) { + auto *P = reinterpret_cast<Elf_Word *>(Buf); + *P++ = NBuckets; + *P++ = In<ELFT>::DynSymTab->getNumSymbols() - Symbols.size(); + *P++ = MaskWords; + *P++ = Shift2; + Buf = reinterpret_cast<uint8_t *>(P); +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *&Buf) { + unsigned C = sizeof(Elf_Off) * 8; + + auto *Masks = reinterpret_cast<Elf_Off *>(Buf); + for (const SymbolData &Sym : Symbols) { + size_t Pos = (Sym.Hash / C) & (MaskWords - 1); + uintX_t V = (uintX_t(1) << (Sym.Hash % C)) | + (uintX_t(1) << ((Sym.Hash >> Shift2) % C)); + Masks[Pos] |= V; + } + Buf += sizeof(Elf_Off) * MaskWords; +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeHashTable(uint8_t *Buf) { + Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf); + Elf_Word *Values = Buckets + NBuckets; + + int PrevBucket = -1; + int I = 0; + for (const SymbolData &Sym : Symbols) { + int Bucket = Sym.Hash % NBuckets; + assert(PrevBucket <= Bucket); + if (Bucket != PrevBucket) { + Buckets[Bucket] = Sym.Body->DynsymIndex; + PrevBucket = Bucket; + if (I > 0) + Values[I - 1] |= 1; + } + Values[I] = Sym.Hash & ~1; + ++I; + } + if (I > 0) + Values[I - 1] |= 1; +} + +static uint32_t hashGnu(StringRef Name) { + uint32_t H = 5381; + for (uint8_t C : Name) + H = (H << 5) + H + C; + return H; +} + +// Add symbols to this symbol hash table. Note that this function +// destructively sort a given vector -- which is needed because +// GNU-style hash table places some sorting requirements. +template <class ELFT> +void GnuHashTableSection<ELFT>::addSymbols(std::vector<SymbolTableEntry> &V) { + // Ideally this will just be 'auto' but GCC 6.1 is not able + // to deduce it correctly. + std::vector<SymbolTableEntry>::iterator Mid = + std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) { + return S.Symbol->isUndefined(); + }); + if (Mid == V.end()) + return; + for (auto I = Mid, E = V.end(); I != E; ++I) { + SymbolBody *B = I->Symbol; + size_t StrOff = I->StrTabOffset; + Symbols.push_back({B, StrOff, hashGnu(B->getName())}); + } + + unsigned NBuckets = calcNBuckets(Symbols.size()); + std::stable_sort(Symbols.begin(), Symbols.end(), + [&](const SymbolData &L, const SymbolData &R) { + return L.Hash % NBuckets < R.Hash % NBuckets; + }); + + V.erase(Mid, V.end()); + for (const SymbolData &Sym : Symbols) + V.push_back({Sym.Body, Sym.STName}); +} + +template <class ELFT> +HashTableSection<ELFT>::HashTableSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_HASH, sizeof(Elf_Word), ".hash") { + this->Entsize = sizeof(Elf_Word); +} + +template <class ELFT> void HashTableSection<ELFT>::finalize() { + this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; + this->OutSec->Entsize = this->Entsize; + + unsigned NumEntries = 2; // nbucket and nchain. + NumEntries += In<ELFT>::DynSymTab->getNumSymbols(); // The chain entries. + + // Create as many buckets as there are symbols. + // FIXME: This is simplistic. We can try to optimize it, but implementing + // support for SHT_GNU_HASH is probably even more profitable. + NumEntries += In<ELFT>::DynSymTab->getNumSymbols(); + this->Size = NumEntries * sizeof(Elf_Word); +} + +template <class ELFT> void HashTableSection<ELFT>::writeTo(uint8_t *Buf) { + unsigned NumSymbols = In<ELFT>::DynSymTab->getNumSymbols(); + auto *P = reinterpret_cast<Elf_Word *>(Buf); + *P++ = NumSymbols; // nbucket + *P++ = NumSymbols; // nchain + + Elf_Word *Buckets = P; + Elf_Word *Chains = P + NumSymbols; + + for (const SymbolTableEntry &S : In<ELFT>::DynSymTab->getSymbols()) { + SymbolBody *Body = S.Symbol; + StringRef Name = Body->getName(); + unsigned I = Body->DynsymIndex; + uint32_t Hash = hashSysV(Name) % NumSymbols; + Chains[I] = Buckets[Hash]; + Buckets[Hash] = I; + } +} + +template <class ELFT> +PltSection<ELFT>::PltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, + ".plt") {} + +template <class ELFT> void PltSection<ELFT>::writeTo(uint8_t *Buf) { + // At beginning of PLT, we have code to call the dynamic linker + // to resolve dynsyms at runtime. Write such code. + Target->writePltHeader(Buf); + size_t Off = Target->PltHeaderSize; + + for (auto &I : Entries) { + const SymbolBody *B = I.first; + unsigned RelOff = I.second; + uint64_t Got = B->getGotPltVA<ELFT>(); + uint64_t Plt = this->getVA() + Off; + Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); + Off += Target->PltEntrySize; + } +} + +template <class ELFT> void PltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.PltIndex = Entries.size(); + unsigned RelOff = In<ELFT>::RelaPlt->getRelocOffset(); + Entries.push_back(std::make_pair(&Sym, RelOff)); +} + +template <class ELFT> size_t PltSection<ELFT>::getSize() const { + return Target->PltHeaderSize + Entries.size() * Target->PltEntrySize; +} + +template <class ELFT> +IpltSection<ELFT>::IpltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, + ".plt") {} + +template <class ELFT> void IpltSection<ELFT>::writeTo(uint8_t *Buf) { + // The IRelative relocations do not support lazy binding so no header is + // needed + size_t Off = 0; + for (auto &I : Entries) { + const SymbolBody *B = I.first; + unsigned RelOff = I.second + In<ELFT>::Plt->getSize(); + uint64_t Got = B->getGotPltVA<ELFT>(); + uint64_t Plt = this->getVA() + Off; + Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); + Off += Target->PltEntrySize; + } +} + +template <class ELFT> void IpltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.PltIndex = Entries.size(); + Sym.IsInIplt = true; + unsigned RelOff = In<ELFT>::RelaIplt->getRelocOffset(); + Entries.push_back(std::make_pair(&Sym, RelOff)); +} + +template <class ELFT> size_t IpltSection<ELFT>::getSize() const { + return Entries.size() * Target->PltEntrySize; +} + +template <class ELFT> +GdbIndexSection<ELFT>::GdbIndexSection() + : SyntheticSection<ELFT>(0, SHT_PROGBITS, 1, ".gdb_index"), + StringPool(llvm::StringTableBuilder::ELF) {} + +template <class ELFT> void GdbIndexSection<ELFT>::parseDebugSections() { + for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) + if (InputSection<ELFT> *IS = dyn_cast<InputSection<ELFT>>(S)) + if (IS->OutSec && IS->Name == ".debug_info") + readDwarf(IS); +} + +// Iterative hash function for symbol's name is described in .gdb_index format +// specification. Note that we use one for version 5 to 7 here, it is different +// for version 4. +static uint32_t hash(StringRef Str) { + uint32_t R = 0; + for (uint8_t C : Str) + R = R * 67 + tolower(C) - 113; + return R; +} + +template <class ELFT> +void GdbIndexSection<ELFT>::readDwarf(InputSection<ELFT> *I) { + GdbIndexBuilder<ELFT> Builder(I); + if (ErrorCount) + return; + + size_t CuId = CompilationUnits.size(); + std::vector<std::pair<uintX_t, uintX_t>> CuList = Builder.readCUList(); + CompilationUnits.insert(CompilationUnits.end(), CuList.begin(), CuList.end()); + + std::vector<AddressEntry<ELFT>> AddrArea = Builder.readAddressArea(CuId); + AddressArea.insert(AddressArea.end(), AddrArea.begin(), AddrArea.end()); + + std::vector<std::pair<StringRef, uint8_t>> NamesAndTypes = + Builder.readPubNamesAndTypes(); + + for (std::pair<StringRef, uint8_t> &Pair : NamesAndTypes) { + uint32_t Hash = hash(Pair.first); + size_t Offset = StringPool.add(Pair.first); + + bool IsNew; + GdbSymbol *Sym; + std::tie(IsNew, Sym) = SymbolTable.add(Hash, Offset); + if (IsNew) { + Sym->CuVectorIndex = CuVectors.size(); + CuVectors.push_back({{CuId, Pair.second}}); + continue; + } + + std::vector<std::pair<uint32_t, uint8_t>> &CuVec = + CuVectors[Sym->CuVectorIndex]; + CuVec.push_back({CuId, Pair.second}); + } +} + +template <class ELFT> void GdbIndexSection<ELFT>::finalize() { + if (Finalized) + return; + Finalized = true; + + parseDebugSections(); + + // GdbIndex header consist from version fields + // and 5 more fields with different kinds of offsets. + CuTypesOffset = CuListOffset + CompilationUnits.size() * CompilationUnitSize; + SymTabOffset = CuTypesOffset + AddressArea.size() * AddressEntrySize; + + ConstantPoolOffset = + SymTabOffset + SymbolTable.getCapacity() * SymTabEntrySize; + + for (std::vector<std::pair<uint32_t, uint8_t>> &CuVec : CuVectors) { + CuVectorsOffset.push_back(CuVectorsSize); + CuVectorsSize += OffsetTypeSize * (CuVec.size() + 1); + } + StringPoolOffset = ConstantPoolOffset + CuVectorsSize; + + StringPool.finalizeInOrder(); +} + +template <class ELFT> size_t GdbIndexSection<ELFT>::getSize() const { + const_cast<GdbIndexSection<ELFT> *>(this)->finalize(); + return StringPoolOffset + StringPool.getSize(); +} + +template <class ELFT> void GdbIndexSection<ELFT>::writeTo(uint8_t *Buf) { + write32le(Buf, 7); // Write version. + write32le(Buf + 4, CuListOffset); // CU list offset. + write32le(Buf + 8, CuTypesOffset); // Types CU list offset. + write32le(Buf + 12, CuTypesOffset); // Address area offset. + write32le(Buf + 16, SymTabOffset); // Symbol table offset. + write32le(Buf + 20, ConstantPoolOffset); // Constant pool offset. + Buf += 24; + + // Write the CU list. + for (std::pair<uintX_t, uintX_t> CU : CompilationUnits) { + write64le(Buf, CU.first); + write64le(Buf + 8, CU.second); + Buf += 16; + } + + // Write the address area. + for (AddressEntry<ELFT> &E : AddressArea) { + uintX_t BaseAddr = E.Section->OutSec->Addr + E.Section->getOffset(0); + write64le(Buf, BaseAddr + E.LowAddress); + write64le(Buf + 8, BaseAddr + E.HighAddress); + write32le(Buf + 16, E.CuIndex); + Buf += 20; + } + + // Write the symbol table. + for (size_t I = 0; I < SymbolTable.getCapacity(); ++I) { + GdbSymbol *Sym = SymbolTable.getSymbol(I); + if (Sym) { + size_t NameOffset = + Sym->NameOffset + StringPoolOffset - ConstantPoolOffset; + size_t CuVectorOffset = CuVectorsOffset[Sym->CuVectorIndex]; + write32le(Buf, NameOffset); + write32le(Buf + 4, CuVectorOffset); + } + Buf += 8; + } + + // Write the CU vectors into the constant pool. + for (std::vector<std::pair<uint32_t, uint8_t>> &CuVec : CuVectors) { + write32le(Buf, CuVec.size()); + Buf += 4; + for (std::pair<uint32_t, uint8_t> &P : CuVec) { + uint32_t Index = P.first; + uint8_t Flags = P.second; + Index |= Flags << 24; + write32le(Buf, Index); + Buf += 4; + } + } + + StringPool.write(Buf); +} + +template <class ELFT> bool GdbIndexSection<ELFT>::empty() const { + return !Out<ELFT>::DebugInfo; +} + +template <class ELFT> +EhFrameHeader<ELFT>::EhFrameHeader() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame_hdr") {} + +// .eh_frame_hdr contains a binary search table of pointers to FDEs. +// Each entry of the search table consists of two values, +// the starting PC from where FDEs covers, and the FDE's address. +// It is sorted by PC. +template <class ELFT> void EhFrameHeader<ELFT>::writeTo(uint8_t *Buf) { + const endianness E = ELFT::TargetEndianness; + + // Sort the FDE list by their PC and uniqueify. Usually there is only + // one FDE for a PC (i.e. function), but if ICF merges two functions + // into one, there can be more than one FDEs pointing to the address. + auto Less = [](const FdeData &A, const FdeData &B) { return A.Pc < B.Pc; }; + std::stable_sort(Fdes.begin(), Fdes.end(), Less); + auto Eq = [](const FdeData &A, const FdeData &B) { return A.Pc == B.Pc; }; + Fdes.erase(std::unique(Fdes.begin(), Fdes.end(), Eq), Fdes.end()); + + Buf[0] = 1; + Buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; + Buf[2] = DW_EH_PE_udata4; + Buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; + write32<E>(Buf + 4, Out<ELFT>::EhFrame->Addr - this->getVA() - 4); + write32<E>(Buf + 8, Fdes.size()); + Buf += 12; + + uintX_t VA = this->getVA(); + for (FdeData &Fde : Fdes) { + write32<E>(Buf, Fde.Pc - VA); + write32<E>(Buf + 4, Fde.FdeVA - VA); + Buf += 8; + } +} + +template <class ELFT> size_t EhFrameHeader<ELFT>::getSize() const { + // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. + return 12 + Out<ELFT>::EhFrame->NumFdes * 8; +} + +template <class ELFT> +void EhFrameHeader<ELFT>::addFde(uint32_t Pc, uint32_t FdeVA) { + Fdes.push_back({Pc, FdeVA}); +} + +template <class ELFT> bool EhFrameHeader<ELFT>::empty() const { + return Out<ELFT>::EhFrame->empty(); +} + +template <class ELFT> +VersionDefinitionSection<ELFT>::VersionDefinitionSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t), + ".gnu.version_d") {} + +static StringRef getFileDefName() { + if (!Config->SoName.empty()) + return Config->SoName; + return Config->OutputFile; +} + +template <class ELFT> void VersionDefinitionSection<ELFT>::finalize() { + FileDefNameOff = In<ELFT>::DynStrTab->addString(getFileDefName()); + for (VersionDefinition &V : Config->VersionDefinitions) + V.NameOff = In<ELFT>::DynStrTab->addString(V.Name); + + this->OutSec->Link = this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + + // sh_info should be set to the number of definitions. This fact is missed in + // documentation, but confirmed by binutils community: + // https://sourceware.org/ml/binutils/2014-11/msg00355.html + this->OutSec->Info = this->Info = getVerDefNum(); +} + +template <class ELFT> +void VersionDefinitionSection<ELFT>::writeOne(uint8_t *Buf, uint32_t Index, + StringRef Name, size_t NameOff) { + auto *Verdef = reinterpret_cast<Elf_Verdef *>(Buf); + Verdef->vd_version = 1; + Verdef->vd_cnt = 1; + Verdef->vd_aux = sizeof(Elf_Verdef); + Verdef->vd_next = sizeof(Elf_Verdef) + sizeof(Elf_Verdaux); + Verdef->vd_flags = (Index == 1 ? VER_FLG_BASE : 0); + Verdef->vd_ndx = Index; + Verdef->vd_hash = hashSysV(Name); + + auto *Verdaux = reinterpret_cast<Elf_Verdaux *>(Buf + sizeof(Elf_Verdef)); + Verdaux->vda_name = NameOff; + Verdaux->vda_next = 0; +} + +template <class ELFT> +void VersionDefinitionSection<ELFT>::writeTo(uint8_t *Buf) { + writeOne(Buf, 1, getFileDefName(), FileDefNameOff); + + for (VersionDefinition &V : Config->VersionDefinitions) { + Buf += sizeof(Elf_Verdef) + sizeof(Elf_Verdaux); + writeOne(Buf, V.Id, V.Name, V.NameOff); + } + + // Need to terminate the last version definition. + Elf_Verdef *Verdef = reinterpret_cast<Elf_Verdef *>(Buf); + Verdef->vd_next = 0; +} + +template <class ELFT> size_t VersionDefinitionSection<ELFT>::getSize() const { + return (sizeof(Elf_Verdef) + sizeof(Elf_Verdaux)) * getVerDefNum(); +} + +template <class ELFT> +VersionTableSection<ELFT>::VersionTableSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t), + ".gnu.version") {} + +template <class ELFT> void VersionTableSection<ELFT>::finalize() { + this->OutSec->Entsize = this->Entsize = sizeof(Elf_Versym); + // At the moment of june 2016 GNU docs does not mention that sh_link field + // should be set, but Sun docs do. Also readelf relies on this field. + this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; +} + +template <class ELFT> size_t VersionTableSection<ELFT>::getSize() const { + return sizeof(Elf_Versym) * (In<ELFT>::DynSymTab->getSymbols().size() + 1); +} + +template <class ELFT> void VersionTableSection<ELFT>::writeTo(uint8_t *Buf) { + auto *OutVersym = reinterpret_cast<Elf_Versym *>(Buf) + 1; + for (const SymbolTableEntry &S : In<ELFT>::DynSymTab->getSymbols()) { + OutVersym->vs_index = S.Symbol->symbol()->VersionId; + ++OutVersym; + } +} + +template <class ELFT> bool VersionTableSection<ELFT>::empty() const { + return !In<ELFT>::VerDef && In<ELFT>::VerNeed->empty(); +} + +template <class ELFT> +VersionNeedSection<ELFT>::VersionNeedSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t), + ".gnu.version_r") { + // Identifiers in verneed section start at 2 because 0 and 1 are reserved + // for VER_NDX_LOCAL and VER_NDX_GLOBAL. + // First identifiers are reserved by verdef section if it exist. + NextIndex = getVerDefNum() + 1; +} + +template <class ELFT> +void VersionNeedSection<ELFT>::addSymbol(SharedSymbol<ELFT> *SS) { + if (!SS->Verdef) { + SS->symbol()->VersionId = VER_NDX_GLOBAL; + return; + } + SharedFile<ELFT> *F = SS->file(); + // If we don't already know that we need an Elf_Verneed for this DSO, prepare + // to create one by adding it to our needed list and creating a dynstr entry + // for the soname. + if (F->VerdefMap.empty()) + Needed.push_back({F, In<ELFT>::DynStrTab->addString(F->getSoName())}); + typename SharedFile<ELFT>::NeededVer &NV = F->VerdefMap[SS->Verdef]; + // If we don't already know that we need an Elf_Vernaux for this Elf_Verdef, + // prepare to create one by allocating a version identifier and creating a + // dynstr entry for the version name. + if (NV.Index == 0) { + NV.StrTab = In<ELFT>::DynStrTab->addString( + SS->file()->getStringTable().data() + SS->Verdef->getAux()->vda_name); + NV.Index = NextIndex++; + } + SS->symbol()->VersionId = NV.Index; +} + +template <class ELFT> void VersionNeedSection<ELFT>::writeTo(uint8_t *Buf) { + // The Elf_Verneeds need to appear first, followed by the Elf_Vernauxs. + auto *Verneed = reinterpret_cast<Elf_Verneed *>(Buf); + auto *Vernaux = reinterpret_cast<Elf_Vernaux *>(Verneed + Needed.size()); + + for (std::pair<SharedFile<ELFT> *, size_t> &P : Needed) { + // Create an Elf_Verneed for this DSO. + Verneed->vn_version = 1; + Verneed->vn_cnt = P.first->VerdefMap.size(); + Verneed->vn_file = P.second; + Verneed->vn_aux = + reinterpret_cast<char *>(Vernaux) - reinterpret_cast<char *>(Verneed); + Verneed->vn_next = sizeof(Elf_Verneed); + ++Verneed; + + // Create the Elf_Vernauxs for this Elf_Verneed. The loop iterates over + // VerdefMap, which will only contain references to needed version + // definitions. Each Elf_Vernaux is based on the information contained in + // the Elf_Verdef in the source DSO. This loop iterates over a std::map of + // pointers, but is deterministic because the pointers refer to Elf_Verdef + // data structures within a single input file. + for (auto &NV : P.first->VerdefMap) { + Vernaux->vna_hash = NV.first->vd_hash; + Vernaux->vna_flags = 0; + Vernaux->vna_other = NV.second.Index; + Vernaux->vna_name = NV.second.StrTab; + Vernaux->vna_next = sizeof(Elf_Vernaux); + ++Vernaux; + } + + Vernaux[-1].vna_next = 0; + } + Verneed[-1].vn_next = 0; +} + +template <class ELFT> void VersionNeedSection<ELFT>::finalize() { + this->OutSec->Link = this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + this->OutSec->Info = this->Info = Needed.size(); +} + +template <class ELFT> size_t VersionNeedSection<ELFT>::getSize() const { + unsigned Size = Needed.size() * sizeof(Elf_Verneed); + for (const std::pair<SharedFile<ELFT> *, size_t> &P : Needed) + Size += P.first->VerdefMap.size() * sizeof(Elf_Vernaux); + return Size; +} + +template <class ELFT> bool VersionNeedSection<ELFT>::empty() const { + return getNeedNum() == 0; +} + +template <class ELFT> +MipsRldMapSection<ELFT>::MipsRldMapSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + sizeof(typename ELFT::uint), ".rld_map") {} + +template <class ELFT> void MipsRldMapSection<ELFT>::writeTo(uint8_t *Buf) { + // Apply filler from linker script. + uint64_t Filler = Script<ELFT>::X->getFiller(this->Name); + Filler = (Filler << 32) | Filler; + memcpy(Buf, &Filler, getSize()); +} + +template <class ELFT> +ARMExidxSentinelSection<ELFT>::ARMExidxSentinelSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX, + sizeof(typename ELFT::uint), ".ARM.exidx") {} + +// Write a terminating sentinel entry to the end of the .ARM.exidx table. +// This section will have been sorted last in the .ARM.exidx table. +// This table entry will have the form: +// | PREL31 upper bound of code that has exception tables | EXIDX_CANTUNWIND | +template <class ELFT> +void ARMExidxSentinelSection<ELFT>::writeTo(uint8_t *Buf) { + // Get the InputSection before us, we are by definition last + auto RI = cast<OutputSection<ELFT>>(this->OutSec)->Sections.rbegin(); + InputSection<ELFT> *LE = *(++RI); + InputSection<ELFT> *LC = cast<InputSection<ELFT>>(LE->getLinkOrderDep()); + uint64_t S = LC->OutSec->Addr + LC->getOffset(LC->getSize()); + uint64_t P = this->getVA(); + Target->relocateOne(Buf, R_ARM_PREL31, S - P); + write32le(Buf + 4, 0x1); +} + +template InputSection<ELF32LE> *elf::createCommonSection(); +template InputSection<ELF32BE> *elf::createCommonSection(); +template InputSection<ELF64LE> *elf::createCommonSection(); +template InputSection<ELF64BE> *elf::createCommonSection(); + +template InputSection<ELF32LE> *elf::createInterpSection(); +template InputSection<ELF32BE> *elf::createInterpSection(); +template InputSection<ELF64LE> *elf::createInterpSection(); +template InputSection<ELF64BE> *elf::createInterpSection(); + +template MergeInputSection<ELF32LE> *elf::createCommentSection(); +template MergeInputSection<ELF32BE> *elf::createCommentSection(); +template MergeInputSection<ELF64LE> *elf::createCommentSection(); +template MergeInputSection<ELF64BE> *elf::createCommentSection(); + +template class elf::MipsAbiFlagsSection<ELF32LE>; +template class elf::MipsAbiFlagsSection<ELF32BE>; +template class elf::MipsAbiFlagsSection<ELF64LE>; +template class elf::MipsAbiFlagsSection<ELF64BE>; + +template class elf::MipsOptionsSection<ELF32LE>; +template class elf::MipsOptionsSection<ELF32BE>; +template class elf::MipsOptionsSection<ELF64LE>; +template class elf::MipsOptionsSection<ELF64BE>; + +template class elf::MipsReginfoSection<ELF32LE>; +template class elf::MipsReginfoSection<ELF32BE>; +template class elf::MipsReginfoSection<ELF64LE>; +template class elf::MipsReginfoSection<ELF64BE>; + +template class elf::BuildIdSection<ELF32LE>; +template class elf::BuildIdSection<ELF32BE>; +template class elf::BuildIdSection<ELF64LE>; +template class elf::BuildIdSection<ELF64BE>; + +template class elf::GotSection<ELF32LE>; +template class elf::GotSection<ELF32BE>; +template class elf::GotSection<ELF64LE>; +template class elf::GotSection<ELF64BE>; + +template class elf::MipsGotSection<ELF32LE>; +template class elf::MipsGotSection<ELF32BE>; +template class elf::MipsGotSection<ELF64LE>; +template class elf::MipsGotSection<ELF64BE>; + +template class elf::GotPltSection<ELF32LE>; +template class elf::GotPltSection<ELF32BE>; +template class elf::GotPltSection<ELF64LE>; +template class elf::GotPltSection<ELF64BE>; + +template class elf::IgotPltSection<ELF32LE>; +template class elf::IgotPltSection<ELF32BE>; +template class elf::IgotPltSection<ELF64LE>; +template class elf::IgotPltSection<ELF64BE>; + +template class elf::StringTableSection<ELF32LE>; +template class elf::StringTableSection<ELF32BE>; +template class elf::StringTableSection<ELF64LE>; +template class elf::StringTableSection<ELF64BE>; + +template class elf::DynamicSection<ELF32LE>; +template class elf::DynamicSection<ELF32BE>; +template class elf::DynamicSection<ELF64LE>; +template class elf::DynamicSection<ELF64BE>; + +template class elf::RelocationSection<ELF32LE>; +template class elf::RelocationSection<ELF32BE>; +template class elf::RelocationSection<ELF64LE>; +template class elf::RelocationSection<ELF64BE>; + +template class elf::SymbolTableSection<ELF32LE>; +template class elf::SymbolTableSection<ELF32BE>; +template class elf::SymbolTableSection<ELF64LE>; +template class elf::SymbolTableSection<ELF64BE>; + +template class elf::GnuHashTableSection<ELF32LE>; +template class elf::GnuHashTableSection<ELF32BE>; +template class elf::GnuHashTableSection<ELF64LE>; +template class elf::GnuHashTableSection<ELF64BE>; + +template class elf::HashTableSection<ELF32LE>; +template class elf::HashTableSection<ELF32BE>; +template class elf::HashTableSection<ELF64LE>; +template class elf::HashTableSection<ELF64BE>; + +template class elf::PltSection<ELF32LE>; +template class elf::PltSection<ELF32BE>; +template class elf::PltSection<ELF64LE>; +template class elf::PltSection<ELF64BE>; + +template class elf::IpltSection<ELF32LE>; +template class elf::IpltSection<ELF32BE>; +template class elf::IpltSection<ELF64LE>; +template class elf::IpltSection<ELF64BE>; + +template class elf::GdbIndexSection<ELF32LE>; +template class elf::GdbIndexSection<ELF32BE>; +template class elf::GdbIndexSection<ELF64LE>; +template class elf::GdbIndexSection<ELF64BE>; + +template class elf::EhFrameHeader<ELF32LE>; +template class elf::EhFrameHeader<ELF32BE>; +template class elf::EhFrameHeader<ELF64LE>; +template class elf::EhFrameHeader<ELF64BE>; + +template class elf::VersionTableSection<ELF32LE>; +template class elf::VersionTableSection<ELF32BE>; +template class elf::VersionTableSection<ELF64LE>; +template class elf::VersionTableSection<ELF64BE>; + +template class elf::VersionNeedSection<ELF32LE>; +template class elf::VersionNeedSection<ELF32BE>; +template class elf::VersionNeedSection<ELF64LE>; +template class elf::VersionNeedSection<ELF64BE>; + +template class elf::VersionDefinitionSection<ELF32LE>; +template class elf::VersionDefinitionSection<ELF32BE>; +template class elf::VersionDefinitionSection<ELF64LE>; +template class elf::VersionDefinitionSection<ELF64BE>; + +template class elf::MipsRldMapSection<ELF32LE>; +template class elf::MipsRldMapSection<ELF32BE>; +template class elf::MipsRldMapSection<ELF64LE>; +template class elf::MipsRldMapSection<ELF64BE>; + +template class elf::ARMExidxSentinelSection<ELF32LE>; +template class elf::ARMExidxSentinelSection<ELF32BE>; +template class elf::ARMExidxSentinelSection<ELF64LE>; +template class elf::ARMExidxSentinelSection<ELF64BE>; diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.h b/contrib/llvm/tools/lld/ELF/SyntheticSections.h new file mode 100644 index 000000000000..dfefb3821e75 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.h @@ -0,0 +1,747 @@ +//===- SyntheticSection.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYNTHETIC_SECTION_H +#define LLD_ELF_SYNTHETIC_SECTION_H + +#include "GdbIndex.h" +#include "InputSection.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/MC/StringTableBuilder.h" + +namespace lld { +namespace elf { + +template <class ELFT> class SyntheticSection : public InputSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + SyntheticSection(uintX_t Flags, uint32_t Type, uintX_t Addralign, + StringRef Name) + : InputSection<ELFT>(Flags, Type, Addralign, ArrayRef<uint8_t>(), Name, + InputSectionData::Synthetic) { + this->Live = true; + } + + virtual ~SyntheticSection() = default; + virtual void writeTo(uint8_t *Buf) = 0; + virtual size_t getSize() const = 0; + virtual void finalize() {} + virtual bool empty() const { return false; } + + uintX_t getVA() const { + return this->OutSec ? this->OutSec->Addr + this->OutSecOff : 0; + } + + static bool classof(const InputSectionData *D) { + return D->kind() == InputSectionData::Synthetic; + } +}; + +template <class ELFT> class GotSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + GotSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + void finalize() override; + bool empty() const override; + + void addEntry(SymbolBody &Sym); + bool addDynTlsEntry(SymbolBody &Sym); + bool addTlsIndex(); + uintX_t getGlobalDynAddr(const SymbolBody &B) const; + uintX_t getGlobalDynOffset(const SymbolBody &B) const; + + uintX_t getTlsIndexVA() { return this->getVA() + TlsIndexOff; } + uint32_t getTlsIndexOff() const { return TlsIndexOff; } + + // Flag to force GOT to be in output if we have relocations + // that relies on its address. + bool HasGotOffRel = false; + +private: + size_t NumEntries = 0; + uint32_t TlsIndexOff = -1; + uintX_t Size = 0; +}; + +// .note.gnu.build-id section. +template <class ELFT> class BuildIdSection : public SyntheticSection<ELFT> { + // First 16 bytes are a header. + static const unsigned HeaderSize = 16; + +public: + BuildIdSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return HeaderSize + HashSize; } + void writeBuildId(llvm::ArrayRef<uint8_t> Buf); + +private: + void computeHash(llvm::ArrayRef<uint8_t> Buf, + std::function<void(uint8_t *, ArrayRef<uint8_t>)> Hash); + + size_t HashSize; + uint8_t *HashBuf; +}; + +template <class ELFT> +class MipsGotSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + MipsGotSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + void finalize() override; + bool empty() const override; + void addEntry(SymbolBody &Sym, uintX_t Addend, RelExpr Expr); + bool addDynTlsEntry(SymbolBody &Sym); + bool addTlsIndex(); + uintX_t getPageEntryOffset(const SymbolBody &B, uintX_t Addend) const; + uintX_t getBodyEntryOffset(const SymbolBody &B, uintX_t Addend) const; + uintX_t getGlobalDynOffset(const SymbolBody &B) const; + + // Returns the symbol which corresponds to the first entry of the global part + // of GOT on MIPS platform. It is required to fill up MIPS-specific dynamic + // table properties. + // Returns nullptr if the global part is empty. + const SymbolBody *getFirstGlobalEntry() const; + + // Returns the number of entries in the local part of GOT including + // the number of reserved entries. + unsigned getLocalEntriesNum() const; + + // Returns offset of TLS part of the MIPS GOT table. This part goes + // after 'local' and 'global' entries. + uintX_t getTlsOffset() const; + + uint32_t getTlsIndexOff() const { return TlsIndexOff; } + + uintX_t getGp() const; + +private: + // MIPS GOT consists of three parts: local, global and tls. Each part + // contains different types of entries. Here is a layout of GOT: + // - Header entries | + // - Page entries | Local part + // - Local entries (16-bit access) | + // - Local entries (32-bit access) | + // - Normal global entries || Global part + // - Reloc-only global entries || + // - TLS entries ||| TLS part + // + // Header: + // Two entries hold predefined value 0x0 and 0x80000000. + // Page entries: + // These entries created by R_MIPS_GOT_PAGE relocation and R_MIPS_GOT16 + // relocation against local symbols. They are initialized by higher 16-bit + // of the corresponding symbol's value. So each 64kb of address space + // requires a single GOT entry. + // Local entries (16-bit access): + // These entries created by GOT relocations against global non-preemptible + // symbols so dynamic linker is not necessary to resolve the symbol's + // values. "16-bit access" means that corresponding relocations address + // GOT using 16-bit index. Each unique Symbol-Addend pair has its own + // GOT entry. + // Local entries (32-bit access): + // These entries are the same as above but created by relocations which + // address GOT using 32-bit index (R_MIPS_GOT_HI16/LO16 etc). + // Normal global entries: + // These entries created by GOT relocations against preemptible global + // symbols. They need to be initialized by dynamic linker and they ordered + // exactly as the corresponding entries in the dynamic symbols table. + // Reloc-only global entries: + // These entries created for symbols that are referenced by dynamic + // relocations R_MIPS_REL32. These entries are not accessed with gp-relative + // addressing, but MIPS ABI requires that these entries be present in GOT. + // TLS entries: + // Entries created by TLS relocations. + + // Number of "Header" entries. + static const unsigned HeaderEntriesNum = 2; + // Number of allocated "Page" entries. + uint32_t PageEntriesNum = 0; + // Map output sections referenced by MIPS GOT relocations + // to the first index of "Page" entries allocated for this section. + llvm::SmallMapVector<const OutputSectionBase *, size_t, 16> PageIndexMap; + + typedef std::pair<const SymbolBody *, uintX_t> GotEntry; + typedef std::vector<GotEntry> GotEntries; + // Map from Symbol-Addend pair to the GOT index. + llvm::DenseMap<GotEntry, size_t> EntryIndexMap; + // Local entries (16-bit access). + GotEntries LocalEntries; + // Local entries (32-bit access). + GotEntries LocalEntries32; + + // Normal and reloc-only global entries. + GotEntries GlobalEntries; + + // TLS entries. + std::vector<const SymbolBody *> TlsEntries; + + uint32_t TlsIndexOff = -1; + uintX_t Size = 0; +}; + +template <class ELFT> +class GotPltSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + GotPltSection(); + void addEntry(SymbolBody &Sym); + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override { return Entries.empty(); } + +private: + std::vector<const SymbolBody *> Entries; +}; + +// The IgotPltSection is a Got associated with the IpltSection for GNU Ifunc +// Symbols that will be relocated by Target->IRelativeRel. +// On most Targets the IgotPltSection will immediately follow the GotPltSection +// on ARM the IgotPltSection will immediately follow the GotSection. +template <class ELFT> +class IgotPltSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + IgotPltSection(); + void addEntry(SymbolBody &Sym); + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override { return Entries.empty(); } + +private: + std::vector<const SymbolBody *> Entries; +}; + +template <class ELFT> +class StringTableSection final : public SyntheticSection<ELFT> { +public: + typedef typename ELFT::uint uintX_t; + StringTableSection(StringRef Name, bool Dynamic); + unsigned addString(StringRef S, bool HashIt = true); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + bool isDynamic() const { return Dynamic; } + +private: + const bool Dynamic; + + // ELF string tables start with a NUL byte, so 1. + uintX_t Size = 1; + + llvm::DenseMap<StringRef, unsigned> StringMap; + std::vector<StringRef> Strings; +}; + +template <class ELFT> class DynamicReloc { + typedef typename ELFT::uint uintX_t; + +public: + DynamicReloc(uint32_t Type, const InputSectionBase<ELFT> *InputSec, + uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, + uintX_t Addend) + : Type(Type), Sym(Sym), InputSec(InputSec), OffsetInSec(OffsetInSec), + UseSymVA(UseSymVA), Addend(Addend) {} + + DynamicReloc(uint32_t Type, const OutputSectionBase *OutputSec, + uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, + uintX_t Addend) + : Type(Type), Sym(Sym), OutputSec(OutputSec), OffsetInSec(OffsetInSec), + UseSymVA(UseSymVA), Addend(Addend) {} + + uintX_t getOffset() const; + uintX_t getAddend() const; + uint32_t getSymIndex() const; + const OutputSectionBase *getOutputSec() const { return OutputSec; } + const InputSectionBase<ELFT> *getInputSec() const { return InputSec; } + + uint32_t Type; + +private: + SymbolBody *Sym; + const InputSectionBase<ELFT> *InputSec = nullptr; + const OutputSectionBase *OutputSec = nullptr; + uintX_t OffsetInSec; + bool UseSymVA; + uintX_t Addend; +}; + +template <class ELFT> +class DynamicSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Dyn Elf_Dyn; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + + // The .dynamic section contains information for the dynamic linker. + // The section consists of fixed size entries, which consist of + // type and value fields. Value are one of plain integers, symbol + // addresses, or section addresses. This struct represents the entry. + struct Entry { + int32_t Tag; + union { + OutputSectionBase *OutSec; + InputSection<ELFT> *InSec; + uint64_t Val; + const SymbolBody *Sym; + }; + enum KindT { SecAddr, SecSize, SymAddr, PlainInt, InSecAddr } Kind; + Entry(int32_t Tag, OutputSectionBase *OutSec, KindT Kind = SecAddr) + : Tag(Tag), OutSec(OutSec), Kind(Kind) {} + Entry(int32_t Tag, InputSection<ELFT> *Sec) + : Tag(Tag), InSec(Sec), Kind(InSecAddr) {} + Entry(int32_t Tag, uint64_t Val) : Tag(Tag), Val(Val), Kind(PlainInt) {} + Entry(int32_t Tag, const SymbolBody *Sym) + : Tag(Tag), Sym(Sym), Kind(SymAddr) {} + }; + + // finalize() fills this vector with the section contents. finalize() + // cannot directly create final section contents because when the + // function is called, symbol or section addresses are not fixed yet. + std::vector<Entry> Entries; + +public: + DynamicSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + +private: + void addEntries(); + void add(Entry E) { Entries.push_back(E); } + uintX_t Size = 0; +}; + +template <class ELFT> +class RelocationSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::uint uintX_t; + +public: + RelocationSection(StringRef Name, bool Sort); + void addReloc(const DynamicReloc<ELFT> &Reloc); + unsigned getRelocOffset(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + bool empty() const override { return Relocs.empty(); } + size_t getSize() const override { return Relocs.size() * this->Entsize; } + size_t getRelativeRelocCount() const { return NumRelativeRelocs; } + +private: + bool Sort; + size_t NumRelativeRelocs = 0; + std::vector<DynamicReloc<ELFT>> Relocs; +}; + +struct SymbolTableEntry { + SymbolBody *Symbol; + size_t StrTabOffset; +}; + +template <class ELFT> +class SymbolTableSection final : public SyntheticSection<ELFT> { +public: + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::SymRange Elf_Sym_Range; + typedef typename ELFT::uint uintX_t; + SymbolTableSection(StringTableSection<ELFT> &StrTabSec); + + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return getNumSymbols() * sizeof(Elf_Sym); } + void addSymbol(SymbolBody *Body); + StringTableSection<ELFT> &getStrTabSec() const { return StrTabSec; } + unsigned getNumSymbols() const { return NumLocals + Symbols.size() + 1; } + + ArrayRef<SymbolTableEntry> getSymbols() const { return Symbols; } + + unsigned NumLocals = 0; + StringTableSection<ELFT> &StrTabSec; + +private: + void writeLocalSymbols(uint8_t *&Buf); + void writeGlobalSymbols(uint8_t *Buf); + + const OutputSectionBase *getOutputSection(SymbolBody *Sym); + + // A vector of symbols and their string table offsets. + std::vector<SymbolTableEntry> Symbols; +}; + +// Outputs GNU Hash section. For detailed explanation see: +// https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections +template <class ELFT> +class GnuHashTableSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Off Elf_Off; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::uint uintX_t; + +public: + GnuHashTableSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return this->Size; } + + // Adds symbols to the hash table. + // Sorts the input to satisfy GNU hash section requirements. + void addSymbols(std::vector<SymbolTableEntry> &Symbols); + +private: + static unsigned calcNBuckets(unsigned NumHashed); + static unsigned calcMaskWords(unsigned NumHashed); + + void writeHeader(uint8_t *&Buf); + void writeBloomFilter(uint8_t *&Buf); + void writeHashTable(uint8_t *Buf); + + struct SymbolData { + SymbolBody *Body; + size_t STName; + uint32_t Hash; + }; + + std::vector<SymbolData> Symbols; + + unsigned MaskWords; + unsigned NBuckets; + unsigned Shift2; + uintX_t Size = 0; +}; + +template <class ELFT> +class HashTableSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Word Elf_Word; + +public: + HashTableSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return this->Size; } + +private: + size_t Size = 0; +}; + +template <class ELFT> class PltSection final : public SyntheticSection<ELFT> { +public: + PltSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + void addEntry(SymbolBody &Sym); + bool empty() const override { return Entries.empty(); } + +private: + std::vector<std::pair<const SymbolBody *, unsigned>> Entries; +}; + +// The IpltSection is a variant of Plt for recording entries for GNU Ifunc +// symbols that will be subject to a Target->IRelativeRel +// The IpltSection immediately follows the Plt section in the Output Section +template <class ELFT> class IpltSection final : public SyntheticSection<ELFT> { +public: + IpltSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + void addEntry(SymbolBody &Sym); + bool empty() const override { return Entries.empty(); } + +private: + std::vector<std::pair<const SymbolBody *, unsigned>> Entries; +}; + +template <class ELFT> +class GdbIndexSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + + const unsigned OffsetTypeSize = 4; + const unsigned CuListOffset = 6 * OffsetTypeSize; + const unsigned CompilationUnitSize = 16; + const unsigned AddressEntrySize = 16 + OffsetTypeSize; + const unsigned SymTabEntrySize = 2 * OffsetTypeSize; + +public: + GdbIndexSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + bool empty() const override; + + // Pairs of [CU Offset, CU length]. + std::vector<std::pair<uintX_t, uintX_t>> CompilationUnits; + + llvm::StringTableBuilder StringPool; + + GdbHashTab SymbolTable; + + // The CU vector portion of the constant pool. + std::vector<std::vector<std::pair<uint32_t, uint8_t>>> CuVectors; + + std::vector<AddressEntry<ELFT>> AddressArea; + +private: + void parseDebugSections(); + void readDwarf(InputSection<ELFT> *I); + + uint32_t CuTypesOffset; + uint32_t SymTabOffset; + uint32_t ConstantPoolOffset; + uint32_t StringPoolOffset; + + size_t CuVectorsSize = 0; + std::vector<size_t> CuVectorsOffset; + + bool Finalized = false; +}; + +// --eh-frame-hdr option tells linker to construct a header for all the +// .eh_frame sections. This header is placed to a section named .eh_frame_hdr +// and also to a PT_GNU_EH_FRAME segment. +// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by +// calling dl_iterate_phdr. +// This section contains a lookup table for quick binary search of FDEs. +// Detailed info about internals can be found in Ian Lance Taylor's blog: +// http://www.airs.com/blog/archives/460 (".eh_frame") +// http://www.airs.com/blog/archives/462 (".eh_frame_hdr") +template <class ELFT> +class EhFrameHeader final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + EhFrameHeader(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + void addFde(uint32_t Pc, uint32_t FdeVA); + bool empty() const override; + +private: + struct FdeData { + uint32_t Pc; + uint32_t FdeVA; + }; + + std::vector<FdeData> Fdes; +}; + +// For more information about .gnu.version and .gnu.version_r see: +// https://www.akkadia.org/drepper/symbol-versioning + +// The .gnu.version_d section which has a section type of SHT_GNU_verdef shall +// contain symbol version definitions. The number of entries in this section +// shall be contained in the DT_VERDEFNUM entry of the .dynamic section. +// The section shall contain an array of Elf_Verdef structures, optionally +// followed by an array of Elf_Verdaux structures. +template <class ELFT> +class VersionDefinitionSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::Verdaux Elf_Verdaux; + +public: + VersionDefinitionSection(); + void finalize() override; + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + +private: + void writeOne(uint8_t *Buf, uint32_t Index, StringRef Name, size_t NameOff); + + unsigned FileDefNameOff; +}; + +// The .gnu.version section specifies the required version of each symbol in the +// dynamic symbol table. It contains one Elf_Versym for each dynamic symbol +// table entry. An Elf_Versym is just a 16-bit integer that refers to a version +// identifier defined in the either .gnu.version_r or .gnu.version_d section. +// The values 0 and 1 are reserved. All other values are used for versions in +// the own object or in any of the dependencies. +template <class ELFT> +class VersionTableSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Versym Elf_Versym; + +public: + VersionTableSection(); + void finalize() override; + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override; +}; + +// The .gnu.version_r section defines the version identifiers used by +// .gnu.version. It contains a linked list of Elf_Verneed data structures. Each +// Elf_Verneed specifies the version requirements for a single DSO, and contains +// a reference to a linked list of Elf_Vernaux data structures which define the +// mapping from version identifiers to version names. +template <class ELFT> +class VersionNeedSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Verneed Elf_Verneed; + typedef typename ELFT::Vernaux Elf_Vernaux; + + // A vector of shared files that need Elf_Verneed data structures and the + // string table offsets of their sonames. + std::vector<std::pair<SharedFile<ELFT> *, size_t>> Needed; + + // The next available version identifier. + unsigned NextIndex; + +public: + VersionNeedSection(); + void addSymbol(SharedSymbol<ELFT> *SS); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + size_t getNeedNum() const { return Needed.size(); } + bool empty() const override; +}; + +// .MIPS.abiflags section. +template <class ELFT> +class MipsAbiFlagsSection final : public SyntheticSection<ELFT> { + typedef llvm::object::Elf_Mips_ABIFlags<ELFT> Elf_Mips_ABIFlags; + +public: + static MipsAbiFlagsSection *create(); + + MipsAbiFlagsSection(Elf_Mips_ABIFlags Flags); + size_t getSize() const override { return sizeof(Elf_Mips_ABIFlags); } + void writeTo(uint8_t *Buf) override; + +private: + Elf_Mips_ABIFlags Flags; +}; + +// .MIPS.options section. +template <class ELFT> +class MipsOptionsSection final : public SyntheticSection<ELFT> { + typedef llvm::object::Elf_Mips_Options<ELFT> Elf_Mips_Options; + typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; + +public: + static MipsOptionsSection *create(); + + MipsOptionsSection(Elf_Mips_RegInfo Reginfo); + void writeTo(uint8_t *Buf) override; + + size_t getSize() const override { + return sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); + } + +private: + Elf_Mips_RegInfo Reginfo; +}; + +// MIPS .reginfo section. +template <class ELFT> +class MipsReginfoSection final : public SyntheticSection<ELFT> { + typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; + +public: + static MipsReginfoSection *create(); + + MipsReginfoSection(Elf_Mips_RegInfo Reginfo); + size_t getSize() const override { return sizeof(Elf_Mips_RegInfo); } + void writeTo(uint8_t *Buf) override; + +private: + Elf_Mips_RegInfo Reginfo; +}; + +// This is a MIPS specific section to hold a space within the data segment +// of executable file which is pointed to by the DT_MIPS_RLD_MAP entry. +// See "Dynamic section" in Chapter 5 in the following document: +// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf +template <class ELFT> class MipsRldMapSection : public SyntheticSection<ELFT> { +public: + MipsRldMapSection(); + size_t getSize() const override { return sizeof(typename ELFT::uint); } + void writeTo(uint8_t *Buf) override; +}; + +template <class ELFT> class ARMExidxSentinelSection : public SyntheticSection<ELFT> { +public: + ARMExidxSentinelSection(); + size_t getSize() const override { return 8; } + void writeTo(uint8_t *Buf) override; +}; + +template <class ELFT> InputSection<ELFT> *createCommonSection(); +template <class ELFT> InputSection<ELFT> *createInterpSection(); +template <class ELFT> MergeInputSection<ELFT> *createCommentSection(); + +// Linker generated sections which can be used as inputs. +template <class ELFT> struct In { + static InputSection<ELFT> *ARMAttributes; + static BuildIdSection<ELFT> *BuildId; + static InputSection<ELFT> *Common; + static DynamicSection<ELFT> *Dynamic; + static StringTableSection<ELFT> *DynStrTab; + static SymbolTableSection<ELFT> *DynSymTab; + static EhFrameHeader<ELFT> *EhFrameHdr; + static GnuHashTableSection<ELFT> *GnuHashTab; + static GdbIndexSection<ELFT> *GdbIndex; + static GotSection<ELFT> *Got; + static MipsGotSection<ELFT> *MipsGot; + static GotPltSection<ELFT> *GotPlt; + static IgotPltSection<ELFT> *IgotPlt; + static HashTableSection<ELFT> *HashTab; + static InputSection<ELFT> *Interp; + static MipsRldMapSection<ELFT> *MipsRldMap; + static PltSection<ELFT> *Plt; + static IpltSection<ELFT> *Iplt; + static RelocationSection<ELFT> *RelaDyn; + static RelocationSection<ELFT> *RelaPlt; + static RelocationSection<ELFT> *RelaIplt; + static StringTableSection<ELFT> *ShStrTab; + static StringTableSection<ELFT> *StrTab; + static SymbolTableSection<ELFT> *SymTab; + static VersionDefinitionSection<ELFT> *VerDef; + static VersionTableSection<ELFT> *VerSym; + static VersionNeedSection<ELFT> *VerNeed; +}; + +template <class ELFT> InputSection<ELFT> *In<ELFT>::ARMAttributes; +template <class ELFT> BuildIdSection<ELFT> *In<ELFT>::BuildId; +template <class ELFT> InputSection<ELFT> *In<ELFT>::Common; +template <class ELFT> DynamicSection<ELFT> *In<ELFT>::Dynamic; +template <class ELFT> StringTableSection<ELFT> *In<ELFT>::DynStrTab; +template <class ELFT> SymbolTableSection<ELFT> *In<ELFT>::DynSymTab; +template <class ELFT> EhFrameHeader<ELFT> *In<ELFT>::EhFrameHdr; +template <class ELFT> GdbIndexSection<ELFT> *In<ELFT>::GdbIndex; +template <class ELFT> GnuHashTableSection<ELFT> *In<ELFT>::GnuHashTab; +template <class ELFT> GotSection<ELFT> *In<ELFT>::Got; +template <class ELFT> MipsGotSection<ELFT> *In<ELFT>::MipsGot; +template <class ELFT> GotPltSection<ELFT> *In<ELFT>::GotPlt; +template <class ELFT> IgotPltSection<ELFT> *In<ELFT>::IgotPlt; +template <class ELFT> HashTableSection<ELFT> *In<ELFT>::HashTab; +template <class ELFT> InputSection<ELFT> *In<ELFT>::Interp; +template <class ELFT> MipsRldMapSection<ELFT> *In<ELFT>::MipsRldMap; +template <class ELFT> PltSection<ELFT> *In<ELFT>::Plt; +template <class ELFT> IpltSection<ELFT> *In<ELFT>::Iplt; +template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaDyn; +template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaPlt; +template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaIplt; +template <class ELFT> StringTableSection<ELFT> *In<ELFT>::ShStrTab; +template <class ELFT> StringTableSection<ELFT> *In<ELFT>::StrTab; +template <class ELFT> SymbolTableSection<ELFT> *In<ELFT>::SymTab; +template <class ELFT> VersionDefinitionSection<ELFT> *In<ELFT>::VerDef; +template <class ELFT> VersionTableSection<ELFT> *In<ELFT>::VerSym; +template <class ELFT> VersionNeedSection<ELFT> *In<ELFT>::VerNeed; +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Target.cpp b/contrib/llvm/tools/lld/ELF/Target.cpp new file mode 100644 index 000000000000..cb2b178fa849 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Target.cpp @@ -0,0 +1,2351 @@ +//===- Target.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Machine-specific things, such as applying relocations, creation of +// GOT or PLT entries, etc., are handled in this file. +// +// Refer the ELF spec for the single letter variables, S, A or P, used +// in this file. +// +// Some functions defined in this file has "relaxTls" as part of their names. +// They do peephole optimization for TLS variables by rewriting instructions. +// They are not part of the ABI but optional optimization, so you can skip +// them if you are not interested in how TLS variables are optimized. +// See the following paper for the details. +// +// Ulrich Drepper, ELF Handling For Thread-Local Storage +// http://www.akkadia.org/drepper/tls.pdf +// +//===----------------------------------------------------------------------===// + +#include "Target.h" +#include "Error.h" +#include "InputFiles.h" +#include "Memory.h" +#include "OutputSections.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Thunks.h" +#include "Writer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; + +std::string lld::toString(uint32_t Type) { + return getELFRelocationTypeName(elf::Config->EMachine, Type); +} + +namespace lld { +namespace elf { + +TargetInfo *Target; + +static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); } +static void or32be(uint8_t *P, int32_t V) { write32be(P, read32be(P) | V); } + +template <class ELFT> static std::string getErrorLoc(uint8_t *Loc) { + for (InputSectionData *D : Symtab<ELFT>::X->Sections) { + auto *IS = dyn_cast_or_null<InputSection<ELFT>>(D); + if (!IS || !IS->OutSec) + continue; + + uint8_t *ISLoc = cast<OutputSection<ELFT>>(IS->OutSec)->Loc + IS->OutSecOff; + if (ISLoc <= Loc && Loc < ISLoc + IS->getSize()) + return IS->getLocation(Loc - ISLoc) + ": "; + } + return ""; +} + +static std::string getErrorLocation(uint8_t *Loc) { + switch (Config->EKind) { + case ELF32LEKind: + return getErrorLoc<ELF32LE>(Loc); + case ELF32BEKind: + return getErrorLoc<ELF32BE>(Loc); + case ELF64LEKind: + return getErrorLoc<ELF64LE>(Loc); + case ELF64BEKind: + return getErrorLoc<ELF64BE>(Loc); + default: + llvm_unreachable("unknown ELF type"); + } +} + +template <unsigned N> +static void checkInt(uint8_t *Loc, int64_t V, uint32_t Type) { + if (!isInt<N>(V)) + error(getErrorLocation(Loc) + "relocation " + toString(Type) + + " out of range"); +} + +template <unsigned N> +static void checkUInt(uint8_t *Loc, uint64_t V, uint32_t Type) { + if (!isUInt<N>(V)) + error(getErrorLocation(Loc) + "relocation " + toString(Type) + + " out of range"); +} + +template <unsigned N> +static void checkIntUInt(uint8_t *Loc, uint64_t V, uint32_t Type) { + if (!isInt<N>(V) && !isUInt<N>(V)) + error(getErrorLocation(Loc) + "relocation " + toString(Type) + + " out of range"); +} + +template <unsigned N> +static void checkAlignment(uint8_t *Loc, uint64_t V, uint32_t Type) { + if ((V & (N - 1)) != 0) + error(getErrorLocation(Loc) + "improper alignment for relocation " + + toString(Type)); +} + +namespace { +class X86TargetInfo final : public TargetInfo { +public: + X86TargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + void writeGotPltHeader(uint8_t *Buf) const override; + uint32_t getDynRel(uint32_t Type) const override; + bool isTlsLocalDynamicRel(uint32_t Type) const override; + bool isTlsGlobalDynamicRel(uint32_t Type) const override; + bool isTlsInitialExecRel(uint32_t Type) const override; + void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writePltHeader(uint8_t *Buf) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + + RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const override; + void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; +}; + +template <class ELFT> class X86_64TargetInfo final : public TargetInfo { +public: + X86_64TargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + bool isPicRel(uint32_t Type) const override; + bool isTlsLocalDynamicRel(uint32_t Type) const override; + bool isTlsGlobalDynamicRel(uint32_t Type) const override; + bool isTlsInitialExecRel(uint32_t Type) const override; + void writeGotPltHeader(uint8_t *Buf) const override; + void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writePltHeader(uint8_t *Buf) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + + RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const override; + void relaxGot(uint8_t *Loc, uint64_t Val) const override; + void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + +private: + void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op, + uint8_t ModRm) const; +}; + +class PPCTargetInfo final : public TargetInfo { +public: + PPCTargetInfo(); + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; +}; + +class PPC64TargetInfo final : public TargetInfo { +public: + PPC64TargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; +}; + +class AArch64TargetInfo final : public TargetInfo { +public: + AArch64TargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + bool isPicRel(uint32_t Type) const override; + bool isTlsInitialExecRel(uint32_t Type) const override; + void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writePltHeader(uint8_t *Buf) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + bool usesOnlyLowPageBits(uint32_t Type) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const override; + void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; +}; + +class AMDGPUTargetInfo final : public TargetInfo { +public: + AMDGPUTargetInfo(); + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; +}; + +class ARMTargetInfo final : public TargetInfo { +public: + ARMTargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + bool isPicRel(uint32_t Type) const override; + uint32_t getDynRel(uint32_t Type) const override; + uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + bool isTlsLocalDynamicRel(uint32_t Type) const override; + bool isTlsGlobalDynamicRel(uint32_t Type) const override; + bool isTlsInitialExecRel(uint32_t Type) const override; + void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writePltHeader(uint8_t *Buf) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, const InputFile &File, + const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; +}; + +template <class ELFT> class MipsTargetInfo final : public TargetInfo { +public: + MipsTargetInfo(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + bool isPicRel(uint32_t Type) const override; + uint32_t getDynRel(uint32_t Type) const override; + bool isTlsLocalDynamicRel(uint32_t Type) const override; + bool isTlsGlobalDynamicRel(uint32_t Type) const override; + void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writePltHeader(uint8_t *Buf) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, const InputFile &File, + const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; + bool usesOnlyLowPageBits(uint32_t Type) const override; +}; +} // anonymous namespace + +TargetInfo *createTarget() { + switch (Config->EMachine) { + case EM_386: + case EM_IAMCU: + return make<X86TargetInfo>(); + case EM_AARCH64: + return make<AArch64TargetInfo>(); + case EM_AMDGPU: + return make<AMDGPUTargetInfo>(); + case EM_ARM: + return make<ARMTargetInfo>(); + case EM_MIPS: + switch (Config->EKind) { + case ELF32LEKind: + return make<MipsTargetInfo<ELF32LE>>(); + case ELF32BEKind: + return make<MipsTargetInfo<ELF32BE>>(); + case ELF64LEKind: + return make<MipsTargetInfo<ELF64LE>>(); + case ELF64BEKind: + return make<MipsTargetInfo<ELF64BE>>(); + default: + fatal("unsupported MIPS target"); + } + case EM_PPC: + return make<PPCTargetInfo>(); + case EM_PPC64: + return make<PPC64TargetInfo>(); + case EM_X86_64: + if (Config->EKind == ELF32LEKind) + return make<X86_64TargetInfo<ELF32LE>>(); + return make<X86_64TargetInfo<ELF64LE>>(); + } + fatal("unknown target machine"); +} + +TargetInfo::~TargetInfo() {} + +uint64_t TargetInfo::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { + return 0; +} + +bool TargetInfo::usesOnlyLowPageBits(uint32_t Type) const { return false; } + +RelExpr TargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType, + const InputFile &File, + const SymbolBody &S) const { + return Expr; +} + +bool TargetInfo::isTlsInitialExecRel(uint32_t Type) const { return false; } + +bool TargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { return false; } + +bool TargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { return false; } + +void TargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { + writeGotPlt(Buf, S); +} + +RelExpr TargetInfo::adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const { + return Expr; +} + +void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { + llvm_unreachable("Should not have claimed to be relaxable"); +} + +void TargetInfo::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + llvm_unreachable("Should not have claimed to be relaxable"); +} + +void TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + llvm_unreachable("Should not have claimed to be relaxable"); +} + +void TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + llvm_unreachable("Should not have claimed to be relaxable"); +} + +void TargetInfo::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + llvm_unreachable("Should not have claimed to be relaxable"); +} + +X86TargetInfo::X86TargetInfo() { + CopyRel = R_386_COPY; + GotRel = R_386_GLOB_DAT; + PltRel = R_386_JUMP_SLOT; + IRelativeRel = R_386_IRELATIVE; + RelativeRel = R_386_RELATIVE; + TlsGotRel = R_386_TLS_TPOFF; + TlsModuleIndexRel = R_386_TLS_DTPMOD32; + TlsOffsetRel = R_386_TLS_DTPOFF32; + GotEntrySize = 4; + GotPltEntrySize = 4; + PltEntrySize = 16; + PltHeaderSize = 16; + TlsGdRelaxSkip = 2; +} + +RelExpr X86TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { + switch (Type) { + default: + return R_ABS; + case R_386_TLS_GD: + return R_TLSGD; + case R_386_TLS_LDM: + return R_TLSLD; + case R_386_PLT32: + return R_PLT_PC; + case R_386_PC16: + case R_386_PC32: + return R_PC; + case R_386_GOTPC: + return R_GOTONLY_PC_FROM_END; + case R_386_TLS_IE: + return R_GOT; + case R_386_GOT32: + case R_386_GOT32X: + case R_386_TLS_GOTIE: + return R_GOT_FROM_END; + case R_386_GOTOFF: + return R_GOTREL_FROM_END; + case R_386_TLS_LE: + return R_TLS; + case R_386_TLS_LE_32: + return R_NEG_TLS; + } +} + +RelExpr X86TargetInfo::adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const { + switch (Expr) { + default: + return Expr; + case R_RELAX_TLS_GD_TO_IE: + return R_RELAX_TLS_GD_TO_IE_END; + case R_RELAX_TLS_GD_TO_LE: + return R_RELAX_TLS_GD_TO_LE_NEG; + } +} + +void X86TargetInfo::writeGotPltHeader(uint8_t *Buf) const { + write32le(Buf, In<ELF32LE>::Dynamic->getVA()); +} + +void X86TargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const { + // Entries in .got.plt initially points back to the corresponding + // PLT entries with a fixed offset to skip the first instruction. + write32le(Buf, S.getPltVA<ELF32LE>() + 6); +} + +void X86TargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { + // An x86 entry is the address of the ifunc resolver function. + write32le(Buf, S.getVA<ELF32LE>()); +} + +uint32_t X86TargetInfo::getDynRel(uint32_t Type) const { + if (Type == R_386_TLS_LE) + return R_386_TLS_TPOFF; + if (Type == R_386_TLS_LE_32) + return R_386_TLS_TPOFF32; + return Type; +} + +bool X86TargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { + return Type == R_386_TLS_GD; +} + +bool X86TargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { + return Type == R_386_TLS_LDO_32 || Type == R_386_TLS_LDM; +} + +bool X86TargetInfo::isTlsInitialExecRel(uint32_t Type) const { + return Type == R_386_TLS_IE || Type == R_386_TLS_GOTIE; +} + +void X86TargetInfo::writePltHeader(uint8_t *Buf) const { + // Executable files and shared object files have + // separate procedure linkage tables. + if (Config->Pic) { + const uint8_t V[] = { + 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx) + 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) + 0x90, 0x90, 0x90, 0x90 // nop; nop; nop; nop + }; + memcpy(Buf, V, sizeof(V)); + return; + } + + const uint8_t PltData[] = { + 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushl (GOT+4) + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *(GOT+8) + 0x90, 0x90, 0x90, 0x90 // nop; nop; nop; nop + }; + memcpy(Buf, PltData, sizeof(PltData)); + uint32_t Got = In<ELF32LE>::GotPlt->getVA(); + write32le(Buf + 2, Got + 4); + write32le(Buf + 8, Got + 8); +} + +void X86TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const uint8_t Inst[] = { + 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, // jmp *foo_in_GOT|*foo@GOT(%ebx) + 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $reloc_offset + 0xe9, 0x00, 0x00, 0x00, 0x00 // jmp .PLT0@PC + }; + memcpy(Buf, Inst, sizeof(Inst)); + + // jmp *foo@GOT(%ebx) or jmp *foo_in_GOT + Buf[1] = Config->Pic ? 0xa3 : 0x25; + uint32_t Got = In<ELF32LE>::GotPlt->getVA(); + write32le(Buf + 2, Config->Shared ? GotEntryAddr - Got : GotEntryAddr); + write32le(Buf + 7, RelOff); + write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16); +} + +uint64_t X86TargetInfo::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { + switch (Type) { + default: + return 0; + case R_386_16: + case R_386_PC16: + return read16le(Buf); + case R_386_32: + case R_386_GOT32: + case R_386_GOT32X: + case R_386_GOTOFF: + case R_386_GOTPC: + case R_386_PC32: + case R_386_PLT32: + case R_386_TLS_LE: + return read32le(Buf); + } +} + +void X86TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + checkInt<32>(Loc, Val, Type); + + // R_386_PC16 and R_386_16 are not part of the current i386 psABI. They are + // used by 16-bit x86 objects, like boot loaders. + if (Type == R_386_16 || Type == R_386_PC16) { + write16le(Loc, Val); + return; + } + write32le(Loc, Val); +} + +void X86TargetInfo::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Convert + // leal x@tlsgd(, %ebx, 1), + // call __tls_get_addr@plt + // to + // movl %gs:0,%eax + // subl $x@ntpoff,%eax + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax + 0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 // subl 0(%ebx), %eax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); + relocateOne(Loc + 5, R_386_32, Val); +} + +void X86TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Convert + // leal x@tlsgd(, %ebx, 1), + // call __tls_get_addr@plt + // to + // movl %gs:0, %eax + // addl x@gotntpoff(%ebx), %eax + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax + 0x03, 0x83, 0x00, 0x00, 0x00, 0x00 // addl 0(%ebx), %eax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); + relocateOne(Loc + 5, R_386_32, Val); +} + +// In some conditions, relocations can be optimized to avoid using GOT. +// This function does that for Initial Exec to Local Exec case. +void X86TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Ulrich's document section 6.2 says that @gotntpoff can + // be used with MOVL or ADDL instructions. + // @indntpoff is similar to @gotntpoff, but for use in + // position dependent code. + uint8_t Reg = (Loc[-1] >> 3) & 7; + + if (Type == R_386_TLS_IE) { + if (Loc[-1] == 0xa1) { + // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" + // This case is different from the generic case below because + // this is a 5 byte instruction while below is 6 bytes. + Loc[-1] = 0xb8; + } else if (Loc[-2] == 0x8b) { + // "movl foo@indntpoff,%reg" -> "movl $foo,%reg" + Loc[-2] = 0xc7; + Loc[-1] = 0xc0 | Reg; + } else { + // "addl foo@indntpoff,%reg" -> "addl $foo,%reg" + Loc[-2] = 0x81; + Loc[-1] = 0xc0 | Reg; + } + } else { + assert(Type == R_386_TLS_GOTIE); + if (Loc[-2] == 0x8b) { + // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" + Loc[-2] = 0xc7; + Loc[-1] = 0xc0 | Reg; + } else { + // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg" + Loc[-2] = 0x8d; + Loc[-1] = 0x80 | (Reg << 3) | Reg; + } + } + relocateOne(Loc, R_386_TLS_LE, Val); +} + +void X86TargetInfo::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + if (Type == R_386_TLS_LDO_32) { + relocateOne(Loc, R_386_TLS_LE, Val); + return; + } + + // Convert + // leal foo(%reg),%eax + // call ___tls_get_addr + // to + // movl %gs:0,%eax + // nop + // leal 0(%esi,1),%esi + const uint8_t Inst[] = { + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax + 0x90, // nop + 0x8d, 0x74, 0x26, 0x00 // leal 0(%esi,1),%esi + }; + memcpy(Loc - 2, Inst, sizeof(Inst)); +} + +template <class ELFT> X86_64TargetInfo<ELFT>::X86_64TargetInfo() { + CopyRel = R_X86_64_COPY; + GotRel = R_X86_64_GLOB_DAT; + PltRel = R_X86_64_JUMP_SLOT; + RelativeRel = R_X86_64_RELATIVE; + IRelativeRel = R_X86_64_IRELATIVE; + TlsGotRel = R_X86_64_TPOFF64; + TlsModuleIndexRel = R_X86_64_DTPMOD64; + TlsOffsetRel = R_X86_64_DTPOFF64; + GotEntrySize = 8; + GotPltEntrySize = 8; + PltEntrySize = 16; + PltHeaderSize = 16; + TlsGdRelaxSkip = 2; + // Align to the large page size (known as a superpage or huge page). + // FreeBSD automatically promotes large, superpage-aligned allocations. + DefaultImageBase = 0x200000; +} + +template <class ELFT> +RelExpr X86_64TargetInfo<ELFT>::getRelExpr(uint32_t Type, + const SymbolBody &S) const { + switch (Type) { + default: + return R_ABS; + case R_X86_64_TPOFF32: + return R_TLS; + case R_X86_64_TLSLD: + return R_TLSLD_PC; + case R_X86_64_TLSGD: + return R_TLSGD_PC; + case R_X86_64_SIZE32: + case R_X86_64_SIZE64: + return R_SIZE; + case R_X86_64_PLT32: + return R_PLT_PC; + case R_X86_64_PC32: + case R_X86_64_PC64: + return R_PC; + case R_X86_64_GOT32: + case R_X86_64_GOT64: + return R_GOT_FROM_END; + case R_X86_64_GOTPCREL: + case R_X86_64_GOTPCRELX: + case R_X86_64_REX_GOTPCRELX: + case R_X86_64_GOTTPOFF: + return R_GOT_PC; + case R_X86_64_NONE: + return R_HINT; + } +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::writeGotPltHeader(uint8_t *Buf) const { + // The first entry holds the value of _DYNAMIC. It is not clear why that is + // required, but it is documented in the psabi and the glibc dynamic linker + // seems to use it (note that this is relevant for linking ld.so, not any + // other program). + write64le(Buf, In<ELFT>::Dynamic->getVA()); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::writeGotPlt(uint8_t *Buf, + const SymbolBody &S) const { + // See comments in X86TargetInfo::writeGotPlt. + write32le(Buf, S.getPltVA<ELFT>() + 6); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::writePltHeader(uint8_t *Buf) const { + const uint8_t PltData[] = { + 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushq GOT+8(%rip) + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *GOT+16(%rip) + 0x0f, 0x1f, 0x40, 0x00 // nopl 0x0(rax) + }; + memcpy(Buf, PltData, sizeof(PltData)); + uint64_t Got = In<ELFT>::GotPlt->getVA(); + uint64_t Plt = In<ELFT>::Plt->getVA(); + write32le(Buf + 2, Got - Plt + 2); // GOT+8 + write32le(Buf + 8, Got - Plt + 4); // GOT+16 +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const uint8_t Inst[] = { + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmpq *got(%rip) + 0x68, 0x00, 0x00, 0x00, 0x00, // pushq <relocation index> + 0xe9, 0x00, 0x00, 0x00, 0x00 // jmpq plt[0] + }; + memcpy(Buf, Inst, sizeof(Inst)); + + write32le(Buf + 2, GotEntryAddr - PltEntryAddr - 6); + write32le(Buf + 7, Index); + write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16); +} + +template <class ELFT> +bool X86_64TargetInfo<ELFT>::isPicRel(uint32_t Type) const { + return Type != R_X86_64_PC32 && Type != R_X86_64_32; +} + +template <class ELFT> +bool X86_64TargetInfo<ELFT>::isTlsInitialExecRel(uint32_t Type) const { + return Type == R_X86_64_GOTTPOFF; +} + +template <class ELFT> +bool X86_64TargetInfo<ELFT>::isTlsGlobalDynamicRel(uint32_t Type) const { + return Type == R_X86_64_TLSGD; +} + +template <class ELFT> +bool X86_64TargetInfo<ELFT>::isTlsLocalDynamicRel(uint32_t Type) const { + return Type == R_X86_64_DTPOFF32 || Type == R_X86_64_DTPOFF64 || + Type == R_X86_64_TLSLD; +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Convert + // .byte 0x66 + // leaq x@tlsgd(%rip), %rdi + // .word 0x6666 + // rex64 + // call __tls_get_addr@plt + // to + // mov %fs:0x0,%rax + // lea x@tpoff,%rax + const uint8_t Inst[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax + 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff,%rax + }; + memcpy(Loc - 4, Inst, sizeof(Inst)); + // The original code used a pc relative relocation and so we have to + // compensate for the -4 in had in the addend. + relocateOne(Loc + 8, R_X86_64_TPOFF32, Val + 4); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Convert + // .byte 0x66 + // leaq x@tlsgd(%rip), %rdi + // .word 0x6666 + // rex64 + // call __tls_get_addr@plt + // to + // mov %fs:0x0,%rax + // addq x@tpoff,%rax + const uint8_t Inst[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax + 0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // addq x@tpoff,%rax + }; + memcpy(Loc - 4, Inst, sizeof(Inst)); + // Both code sequences are PC relatives, but since we are moving the constant + // forward by 8 bytes we have to subtract the value by 8. + relocateOne(Loc + 8, R_X86_64_PC32, Val - 8); +} + +// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to +// R_X86_64_TPOFF32 so that it does not use GOT. +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + uint8_t *Inst = Loc - 3; + uint8_t Reg = Loc[-1] >> 3; + uint8_t *RegSlot = Loc - 1; + + // Note that ADD with RSP or R12 is converted to ADD instead of LEA + // because LEA with these registers needs 4 bytes to encode and thus + // wouldn't fit the space. + + if (memcmp(Inst, "\x48\x03\x25", 3) == 0) { + // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp" + memcpy(Inst, "\x48\x81\xc4", 3); + } else if (memcmp(Inst, "\x4c\x03\x25", 3) == 0) { + // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12" + memcpy(Inst, "\x49\x81\xc4", 3); + } else if (memcmp(Inst, "\x4c\x03", 2) == 0) { + // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]" + memcpy(Inst, "\x4d\x8d", 2); + *RegSlot = 0x80 | (Reg << 3) | Reg; + } else if (memcmp(Inst, "\x48\x03", 2) == 0) { + // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg" + memcpy(Inst, "\x48\x8d", 2); + *RegSlot = 0x80 | (Reg << 3) | Reg; + } else if (memcmp(Inst, "\x4c\x8b", 2) == 0) { + // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]" + memcpy(Inst, "\x49\xc7", 2); + *RegSlot = 0xc0 | Reg; + } else if (memcmp(Inst, "\x48\x8b", 2) == 0) { + // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg" + memcpy(Inst, "\x48\xc7", 2); + *RegSlot = 0xc0 | Reg; + } else { + error(getErrorLocation(Loc - 3) + + "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only"); + } + + // The original code used a PC relative relocation. + // Need to compensate for the -4 it had in the addend. + relocateOne(Loc, R_X86_64_TPOFF32, Val + 4); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // Convert + // leaq bar@tlsld(%rip), %rdi + // callq __tls_get_addr@PLT + // leaq bar@dtpoff(%rax), %rcx + // to + // .word 0x6666 + // .byte 0x66 + // mov %fs:0,%rax + // leaq bar@tpoff(%rax), %rcx + if (Type == R_X86_64_DTPOFF64) { + write64le(Loc, Val); + return; + } + if (Type == R_X86_64_DTPOFF32) { + relocateOne(Loc, R_X86_64_TPOFF32, Val); + return; + } + + const uint8_t Inst[] = { + 0x66, 0x66, // .word 0x6666 + 0x66, // .byte 0x66 + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax + }; + memcpy(Loc - 3, Inst, sizeof(Inst)); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + switch (Type) { + case R_X86_64_32: + checkUInt<32>(Loc, Val, Type); + write32le(Loc, Val); + break; + case R_X86_64_32S: + case R_X86_64_TPOFF32: + case R_X86_64_GOT32: + case R_X86_64_GOTPCREL: + case R_X86_64_GOTPCRELX: + case R_X86_64_REX_GOTPCRELX: + case R_X86_64_PC32: + case R_X86_64_GOTTPOFF: + case R_X86_64_PLT32: + case R_X86_64_TLSGD: + case R_X86_64_TLSLD: + case R_X86_64_DTPOFF32: + case R_X86_64_SIZE32: + checkInt<32>(Loc, Val, Type); + write32le(Loc, Val); + break; + case R_X86_64_64: + case R_X86_64_DTPOFF64: + case R_X86_64_GLOB_DAT: + case R_X86_64_PC64: + case R_X86_64_SIZE64: + case R_X86_64_GOT64: + write64le(Loc, Val); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +template <class ELFT> +RelExpr X86_64TargetInfo<ELFT>::adjustRelaxExpr(uint32_t Type, + const uint8_t *Data, + RelExpr RelExpr) const { + if (Type != R_X86_64_GOTPCRELX && Type != R_X86_64_REX_GOTPCRELX) + return RelExpr; + const uint8_t Op = Data[-2]; + const uint8_t ModRm = Data[-1]; + // FIXME: When PIC is disabled and foo is defined locally in the + // lower 32 bit address space, memory operand in mov can be converted into + // immediate operand. Otherwise, mov must be changed to lea. We support only + // latter relaxation at this moment. + if (Op == 0x8b) + return R_RELAX_GOT_PC; + // Relax call and jmp. + if (Op == 0xff && (ModRm == 0x15 || ModRm == 0x25)) + return R_RELAX_GOT_PC; + + // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor. + // If PIC then no relaxation is available. + // We also don't relax test/binop instructions without REX byte, + // they are 32bit operations and not common to have. + assert(Type == R_X86_64_REX_GOTPCRELX); + return Config->Pic ? RelExpr : R_RELAX_GOT_PC_NOPIC; +} + +// A subset of relaxations can only be applied for no-PIC. This method +// handles such relaxations. Instructions encoding information was taken from: +// "Intel 64 and IA-32 Architectures Software Developer's Manual V2" +// (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/ +// 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf) +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxGotNoPic(uint8_t *Loc, uint64_t Val, + uint8_t Op, uint8_t ModRm) const { + const uint8_t Rex = Loc[-3]; + // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg". + if (Op == 0x85) { + // See "TEST-Logical Compare" (4-428 Vol. 2B), + // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension). + + // ModR/M byte has form XX YYY ZZZ, where + // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1). + // XX has different meanings: + // 00: The operand's memory address is in reg1. + // 01: The operand's memory address is reg1 + a byte-sized displacement. + // 10: The operand's memory address is reg1 + a word-sized displacement. + // 11: The operand is reg1 itself. + // If an instruction requires only one operand, the unused reg2 field + // holds extra opcode bits rather than a register code + // 0xC0 == 11 000 000 binary. + // 0x38 == 00 111 000 binary. + // We transfer reg2 to reg1 here as operand. + // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3). + Loc[-1] = 0xc0 | (ModRm & 0x38) >> 3; // ModR/M byte. + + // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32 + // See "TEST-Logical Compare" (4-428 Vol. 2B). + Loc[-2] = 0xf7; + + // Move R bit to the B bit in REX byte. + // REX byte is encoded as 0100WRXB, where + // 0100 is 4bit fixed pattern. + // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the + // default operand size is used (which is 32-bit for most but not all + // instructions). + // REX.R This 1-bit value is an extension to the MODRM.reg field. + // REX.X This 1-bit value is an extension to the SIB.index field. + // REX.B This 1-bit value is an extension to the MODRM.rm field or the + // SIB.base field. + // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A). + Loc[-3] = (Rex & ~0x4) | (Rex & 0x4) >> 2; + relocateOne(Loc, R_X86_64_PC32, Val); + return; + } + + // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub + // or xor operations. + + // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg". + // Logic is close to one for test instruction above, but we also + // write opcode extension here, see below for details. + Loc[-1] = 0xc0 | (ModRm & 0x38) >> 3 | (Op & 0x3c); // ModR/M byte. + + // Primary opcode is 0x81, opcode extension is one of: + // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB, + // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP. + // This value was wrote to MODRM.reg in a line above. + // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15), + // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for + // descriptions about each operation. + Loc[-2] = 0x81; + Loc[-3] = (Rex & ~0x4) | (Rex & 0x4) >> 2; + relocateOne(Loc, R_X86_64_PC32, Val); +} + +template <class ELFT> +void X86_64TargetInfo<ELFT>::relaxGot(uint8_t *Loc, uint64_t Val) const { + const uint8_t Op = Loc[-2]; + const uint8_t ModRm = Loc[-1]; + + // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg". + if (Op == 0x8b) { + Loc[-2] = 0x8d; + relocateOne(Loc, R_X86_64_PC32, Val); + return; + } + + if (Op != 0xff) { + // We are relaxing a rip relative to an absolute, so compensate + // for the old -4 addend. + assert(!Config->Pic); + relaxGotNoPic(Loc, Val + 4, Op, ModRm); + return; + } + + // Convert call/jmp instructions. + if (ModRm == 0x15) { + // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo". + // Instead we convert to "addr32 call foo" where addr32 is an instruction + // prefix. That makes result expression to be a single instruction. + Loc[-2] = 0x67; // addr32 prefix + Loc[-1] = 0xe8; // call + relocateOne(Loc, R_X86_64_PC32, Val); + return; + } + + // Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop". + // jmp doesn't return, so it is fine to use nop here, it is just a stub. + assert(ModRm == 0x25); + Loc[-2] = 0xe9; // jmp + Loc[3] = 0x90; // nop + relocateOne(Loc - 1, R_X86_64_PC32, Val + 1); +} + +// Relocation masks following the #lo(value), #hi(value), #ha(value), +// #higher(value), #highera(value), #highest(value), and #highesta(value) +// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi +// document. +static uint16_t applyPPCLo(uint64_t V) { return V; } +static uint16_t applyPPCHi(uint64_t V) { return V >> 16; } +static uint16_t applyPPCHa(uint64_t V) { return (V + 0x8000) >> 16; } +static uint16_t applyPPCHigher(uint64_t V) { return V >> 32; } +static uint16_t applyPPCHighera(uint64_t V) { return (V + 0x8000) >> 32; } +static uint16_t applyPPCHighest(uint64_t V) { return V >> 48; } +static uint16_t applyPPCHighesta(uint64_t V) { return (V + 0x8000) >> 48; } + +PPCTargetInfo::PPCTargetInfo() {} + +void PPCTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + switch (Type) { + case R_PPC_ADDR16_HA: + write16be(Loc, applyPPCHa(Val)); + break; + case R_PPC_ADDR16_LO: + write16be(Loc, applyPPCLo(Val)); + break; + case R_PPC_ADDR32: + case R_PPC_REL32: + write32be(Loc, Val); + break; + case R_PPC_REL24: + or32be(Loc, Val & 0x3FFFFFC); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +RelExpr PPCTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { + switch (Type) { + case R_PPC_REL24: + case R_PPC_REL32: + return R_PC; + default: + return R_ABS; + } +} + +PPC64TargetInfo::PPC64TargetInfo() { + PltRel = GotRel = R_PPC64_GLOB_DAT; + RelativeRel = R_PPC64_RELATIVE; + GotEntrySize = 8; + GotPltEntrySize = 8; + PltEntrySize = 32; + PltHeaderSize = 0; + + // We need 64K pages (at least under glibc/Linux, the loader won't + // set different permissions on a finer granularity than that). + DefaultMaxPageSize = 65536; + + // The PPC64 ELF ABI v1 spec, says: + // + // It is normally desirable to put segments with different characteristics + // in separate 256 Mbyte portions of the address space, to give the + // operating system full paging flexibility in the 64-bit address space. + // + // And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers + // use 0x10000000 as the starting address. + DefaultImageBase = 0x10000000; +} + +static uint64_t PPC64TocOffset = 0x8000; + +uint64_t getPPC64TocBase() { + // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The + // TOC starts where the first of these sections starts. We always create a + // .got when we see a relocation that uses it, so for us the start is always + // the .got. + uint64_t TocVA = In<ELF64BE>::Got->getVA(); + + // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000 + // thus permitting a full 64 Kbytes segment. Note that the glibc startup + // code (crt1.o) assumes that you can get from the TOC base to the + // start of the .toc section with only a single (signed) 16-bit relocation. + return TocVA + PPC64TocOffset; +} + +RelExpr PPC64TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { + switch (Type) { + default: + return R_ABS; + case R_PPC64_TOC16: + case R_PPC64_TOC16_DS: + case R_PPC64_TOC16_HA: + case R_PPC64_TOC16_HI: + case R_PPC64_TOC16_LO: + case R_PPC64_TOC16_LO_DS: + return R_GOTREL; + case R_PPC64_TOC: + return R_PPC_TOC; + case R_PPC64_REL24: + return R_PPC_PLT_OPD; + } +} + +void PPC64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + uint64_t Off = GotEntryAddr - getPPC64TocBase(); + + // FIXME: What we should do, in theory, is get the offset of the function + // descriptor in the .opd section, and use that as the offset from %r2 (the + // TOC-base pointer). Instead, we have the GOT-entry offset, and that will + // be a pointer to the function descriptor in the .opd section. Using + // this scheme is simpler, but requires an extra indirection per PLT dispatch. + + write32be(Buf, 0xf8410028); // std %r2, 40(%r1) + write32be(Buf + 4, 0x3d620000 | applyPPCHa(Off)); // addis %r11, %r2, X@ha + write32be(Buf + 8, 0xe98b0000 | applyPPCLo(Off)); // ld %r12, X@l(%r11) + write32be(Buf + 12, 0xe96c0000); // ld %r11,0(%r12) + write32be(Buf + 16, 0x7d6903a6); // mtctr %r11 + write32be(Buf + 20, 0xe84c0008); // ld %r2,8(%r12) + write32be(Buf + 24, 0xe96c0010); // ld %r11,16(%r12) + write32be(Buf + 28, 0x4e800420); // bctr +} + +static std::pair<uint32_t, uint64_t> toAddr16Rel(uint32_t Type, uint64_t Val) { + uint64_t V = Val - PPC64TocOffset; + switch (Type) { + case R_PPC64_TOC16: + return {R_PPC64_ADDR16, V}; + case R_PPC64_TOC16_DS: + return {R_PPC64_ADDR16_DS, V}; + case R_PPC64_TOC16_HA: + return {R_PPC64_ADDR16_HA, V}; + case R_PPC64_TOC16_HI: + return {R_PPC64_ADDR16_HI, V}; + case R_PPC64_TOC16_LO: + return {R_PPC64_ADDR16_LO, V}; + case R_PPC64_TOC16_LO_DS: + return {R_PPC64_ADDR16_LO_DS, V}; + default: + return {Type, Val}; + } +} + +void PPC64TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // For a TOC-relative relocation, proceed in terms of the corresponding + // ADDR16 relocation type. + std::tie(Type, Val) = toAddr16Rel(Type, Val); + + switch (Type) { + case R_PPC64_ADDR14: { + checkAlignment<4>(Loc, Val, Type); + // Preserve the AA/LK bits in the branch instruction + uint8_t AALK = Loc[3]; + write16be(Loc + 2, (AALK & 3) | (Val & 0xfffc)); + break; + } + case R_PPC64_ADDR16: + checkInt<16>(Loc, Val, Type); + write16be(Loc, Val); + break; + case R_PPC64_ADDR16_DS: + checkInt<16>(Loc, Val, Type); + write16be(Loc, (read16be(Loc) & 3) | (Val & ~3)); + break; + case R_PPC64_ADDR16_HA: + case R_PPC64_REL16_HA: + write16be(Loc, applyPPCHa(Val)); + break; + case R_PPC64_ADDR16_HI: + case R_PPC64_REL16_HI: + write16be(Loc, applyPPCHi(Val)); + break; + case R_PPC64_ADDR16_HIGHER: + write16be(Loc, applyPPCHigher(Val)); + break; + case R_PPC64_ADDR16_HIGHERA: + write16be(Loc, applyPPCHighera(Val)); + break; + case R_PPC64_ADDR16_HIGHEST: + write16be(Loc, applyPPCHighest(Val)); + break; + case R_PPC64_ADDR16_HIGHESTA: + write16be(Loc, applyPPCHighesta(Val)); + break; + case R_PPC64_ADDR16_LO: + write16be(Loc, applyPPCLo(Val)); + break; + case R_PPC64_ADDR16_LO_DS: + case R_PPC64_REL16_LO: + write16be(Loc, (read16be(Loc) & 3) | (applyPPCLo(Val) & ~3)); + break; + case R_PPC64_ADDR32: + case R_PPC64_REL32: + checkInt<32>(Loc, Val, Type); + write32be(Loc, Val); + break; + case R_PPC64_ADDR64: + case R_PPC64_REL64: + case R_PPC64_TOC: + write64be(Loc, Val); + break; + case R_PPC64_REL24: { + uint32_t Mask = 0x03FFFFFC; + checkInt<24>(Loc, Val, Type); + write32be(Loc, (read32be(Loc) & ~Mask) | (Val & Mask)); + break; + } + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +AArch64TargetInfo::AArch64TargetInfo() { + CopyRel = R_AARCH64_COPY; + RelativeRel = R_AARCH64_RELATIVE; + IRelativeRel = R_AARCH64_IRELATIVE; + GotRel = R_AARCH64_GLOB_DAT; + PltRel = R_AARCH64_JUMP_SLOT; + TlsDescRel = R_AARCH64_TLSDESC; + TlsGotRel = R_AARCH64_TLS_TPREL64; + GotEntrySize = 8; + GotPltEntrySize = 8; + PltEntrySize = 16; + PltHeaderSize = 32; + DefaultMaxPageSize = 65536; + + // It doesn't seem to be documented anywhere, but tls on aarch64 uses variant + // 1 of the tls structures and the tcb size is 16. + TcbSize = 16; +} + +RelExpr AArch64TargetInfo::getRelExpr(uint32_t Type, + const SymbolBody &S) const { + switch (Type) { + default: + return R_ABS; + case R_AARCH64_TLSDESC_ADR_PAGE21: + return R_TLSDESC_PAGE; + case R_AARCH64_TLSDESC_LD64_LO12_NC: + case R_AARCH64_TLSDESC_ADD_LO12_NC: + return R_TLSDESC; + case R_AARCH64_TLSDESC_CALL: + return R_TLSDESC_CALL; + case R_AARCH64_TLSLE_ADD_TPREL_HI12: + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + return R_TLS; + case R_AARCH64_CALL26: + case R_AARCH64_CONDBR19: + case R_AARCH64_JUMP26: + case R_AARCH64_TSTBR14: + return R_PLT_PC; + case R_AARCH64_PREL16: + case R_AARCH64_PREL32: + case R_AARCH64_PREL64: + case R_AARCH64_ADR_PREL_LO21: + return R_PC; + case R_AARCH64_ADR_PREL_PG_HI21: + return R_PAGE_PC; + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + return R_GOT; + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + return R_GOT_PAGE_PC; + } +} + +RelExpr AArch64TargetInfo::adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const { + if (Expr == R_RELAX_TLS_GD_TO_IE) { + if (Type == R_AARCH64_TLSDESC_ADR_PAGE21) + return R_RELAX_TLS_GD_TO_IE_PAGE_PC; + return R_RELAX_TLS_GD_TO_IE_ABS; + } + return Expr; +} + +bool AArch64TargetInfo::usesOnlyLowPageBits(uint32_t Type) const { + switch (Type) { + default: + return false; + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_TLSDESC_ADD_LO12_NC: + case R_AARCH64_TLSDESC_LD64_LO12_NC: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + return true; + } +} + +bool AArch64TargetInfo::isTlsInitialExecRel(uint32_t Type) const { + return Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 || + Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; +} + +bool AArch64TargetInfo::isPicRel(uint32_t Type) const { + return Type == R_AARCH64_ABS32 || Type == R_AARCH64_ABS64; +} + +void AArch64TargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { + write64le(Buf, In<ELF64LE>::Plt->getVA()); +} + +// Page(Expr) is the page address of the expression Expr, defined +// as (Expr & ~0xFFF). (This applies even if the machine page size +// supported by the platform has a different value.) +uint64_t getAArch64Page(uint64_t Expr) { + return Expr & (~static_cast<uint64_t>(0xFFF)); +} + +void AArch64TargetInfo::writePltHeader(uint8_t *Buf) const { + const uint8_t PltData[] = { + 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]! + 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[2])) + 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[2]))] + 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[2])) + 0x20, 0x02, 0x1f, 0xd6, // br x17 + 0x1f, 0x20, 0x03, 0xd5, // nop + 0x1f, 0x20, 0x03, 0xd5, // nop + 0x1f, 0x20, 0x03, 0xd5 // nop + }; + memcpy(Buf, PltData, sizeof(PltData)); + + uint64_t Got = In<ELF64LE>::GotPlt->getVA(); + uint64_t Plt = In<ELF64LE>::Plt->getVA(); + relocateOne(Buf + 4, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(Got + 16) - getAArch64Page(Plt + 4)); + relocateOne(Buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, Got + 16); + relocateOne(Buf + 12, R_AARCH64_ADD_ABS_LO12_NC, Got + 16); +} + +void AArch64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const uint8_t Inst[] = { + 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n])) + 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[n]))] + 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[n])) + 0x20, 0x02, 0x1f, 0xd6 // br x17 + }; + memcpy(Buf, Inst, sizeof(Inst)); + + relocateOne(Buf, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(GotEntryAddr) - getAArch64Page(PltEntryAddr)); + relocateOne(Buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, GotEntryAddr); + relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotEntryAddr); +} + +static void write32AArch64Addr(uint8_t *L, uint64_t Imm) { + uint32_t ImmLo = (Imm & 0x3) << 29; + uint32_t ImmHi = (Imm & 0x1FFFFC) << 3; + uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3); + write32le(L, (read32le(L) & ~Mask) | ImmLo | ImmHi); +} + +// Return the bits [Start, End] from Val shifted Start bits. +// For instance, getBits(0xF0, 4, 8) returns 0xF. +static uint64_t getBits(uint64_t Val, int Start, int End) { + uint64_t Mask = ((uint64_t)1 << (End + 1 - Start)) - 1; + return (Val >> Start) & Mask; +} + +// Update the immediate field in a AARCH64 ldr, str, and add instruction. +static void or32AArch64Imm(uint8_t *L, uint64_t Imm) { + or32le(L, (Imm & 0xFFF) << 10); +} + +void AArch64TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + switch (Type) { + case R_AARCH64_ABS16: + case R_AARCH64_PREL16: + checkIntUInt<16>(Loc, Val, Type); + write16le(Loc, Val); + break; + case R_AARCH64_ABS32: + case R_AARCH64_PREL32: + checkIntUInt<32>(Loc, Val, Type); + write32le(Loc, Val); + break; + case R_AARCH64_ABS64: + case R_AARCH64_GLOB_DAT: + case R_AARCH64_PREL64: + write64le(Loc, Val); + break; + case R_AARCH64_ADD_ABS_LO12_NC: + or32AArch64Imm(Loc, Val); + break; + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + case R_AARCH64_TLSDESC_ADR_PAGE21: + checkInt<33>(Loc, Val, Type); + write32AArch64Addr(Loc, Val >> 12); + break; + case R_AARCH64_ADR_PREL_LO21: + checkInt<21>(Loc, Val, Type); + write32AArch64Addr(Loc, Val); + break; + case R_AARCH64_CALL26: + case R_AARCH64_JUMP26: + checkInt<28>(Loc, Val, Type); + or32le(Loc, (Val & 0x0FFFFFFC) >> 2); + break; + case R_AARCH64_CONDBR19: + checkInt<21>(Loc, Val, Type); + or32le(Loc, (Val & 0x1FFFFC) << 3); + break; + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + case R_AARCH64_TLSDESC_LD64_LO12_NC: + checkAlignment<8>(Loc, Val, Type); + or32le(Loc, (Val & 0xFF8) << 7); + break; + case R_AARCH64_LDST8_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 0, 11)); + break; + case R_AARCH64_LDST16_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 1, 11)); + break; + case R_AARCH64_LDST32_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 2, 11)); + break; + case R_AARCH64_LDST64_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 3, 11)); + break; + case R_AARCH64_LDST128_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 4, 11)); + break; + case R_AARCH64_MOVW_UABS_G0_NC: + or32le(Loc, (Val & 0xFFFF) << 5); + break; + case R_AARCH64_MOVW_UABS_G1_NC: + or32le(Loc, (Val & 0xFFFF0000) >> 11); + break; + case R_AARCH64_MOVW_UABS_G2_NC: + or32le(Loc, (Val & 0xFFFF00000000) >> 27); + break; + case R_AARCH64_MOVW_UABS_G3: + or32le(Loc, (Val & 0xFFFF000000000000) >> 43); + break; + case R_AARCH64_TSTBR14: + checkInt<16>(Loc, Val, Type); + or32le(Loc, (Val & 0xFFFC) << 3); + break; + case R_AARCH64_TLSLE_ADD_TPREL_HI12: + checkInt<24>(Loc, Val, Type); + or32AArch64Imm(Loc, Val >> 12); + break; + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + case R_AARCH64_TLSDESC_ADD_LO12_NC: + or32AArch64Imm(Loc, Val); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +void AArch64TargetInfo::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // TLSDESC Global-Dynamic relocation are in the form: + // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] + // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12_NC] + // add x0, x0, :tlsdesc_los:v [_AARCH64_TLSDESC_ADD_LO12_NC] + // .tlsdesccall [R_AARCH64_TLSDESC_CALL] + // blr x1 + // And it can optimized to: + // movz x0, #0x0, lsl #16 + // movk x0, #0x10 + // nop + // nop + checkUInt<32>(Loc, Val, Type); + + switch (Type) { + case R_AARCH64_TLSDESC_ADD_LO12_NC: + case R_AARCH64_TLSDESC_CALL: + write32le(Loc, 0xd503201f); // nop + return; + case R_AARCH64_TLSDESC_ADR_PAGE21: + write32le(Loc, 0xd2a00000 | (((Val >> 16) & 0xffff) << 5)); // movz + return; + case R_AARCH64_TLSDESC_LD64_LO12_NC: + write32le(Loc, 0xf2800000 | ((Val & 0xffff) << 5)); // movk + return; + default: + llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + } +} + +void AArch64TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + // TLSDESC Global-Dynamic relocation are in the form: + // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] + // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12_NC] + // add x0, x0, :tlsdesc_los:v [_AARCH64_TLSDESC_ADD_LO12_NC] + // .tlsdesccall [R_AARCH64_TLSDESC_CALL] + // blr x1 + // And it can optimized to: + // adrp x0, :gottprel:v + // ldr x0, [x0, :gottprel_lo12:v] + // nop + // nop + + switch (Type) { + case R_AARCH64_TLSDESC_ADD_LO12_NC: + case R_AARCH64_TLSDESC_CALL: + write32le(Loc, 0xd503201f); // nop + break; + case R_AARCH64_TLSDESC_ADR_PAGE21: + write32le(Loc, 0x90000000); // adrp + relocateOne(Loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, Val); + break; + case R_AARCH64_TLSDESC_LD64_LO12_NC: + write32le(Loc, 0xf9400000); // ldr + relocateOne(Loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, Val); + break; + default: + llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + } +} + +void AArch64TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + checkUInt<32>(Loc, Val, Type); + + if (Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { + // Generate MOVZ. + uint32_t RegNo = read32le(Loc) & 0x1f; + write32le(Loc, (0xd2a00000 | RegNo) | (((Val >> 16) & 0xffff) << 5)); + return; + } + if (Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) { + // Generate MOVK. + uint32_t RegNo = read32le(Loc) & 0x1f; + write32le(Loc, (0xf2800000 | RegNo) | ((Val & 0xffff) << 5)); + return; + } + llvm_unreachable("invalid relocation for TLS IE to LE relaxation"); +} + +AMDGPUTargetInfo::AMDGPUTargetInfo() { + RelativeRel = R_AMDGPU_REL64; + GotRel = R_AMDGPU_ABS64; + GotEntrySize = 8; +} + +void AMDGPUTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + switch (Type) { + case R_AMDGPU_ABS32: + case R_AMDGPU_GOTPCREL: + case R_AMDGPU_GOTPCREL32_LO: + case R_AMDGPU_REL32: + case R_AMDGPU_REL32_LO: + write32le(Loc, Val); + break; + case R_AMDGPU_ABS64: + write64le(Loc, Val); + break; + case R_AMDGPU_GOTPCREL32_HI: + case R_AMDGPU_REL32_HI: + write32le(Loc, Val >> 32); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +RelExpr AMDGPUTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { + switch (Type) { + case R_AMDGPU_ABS32: + case R_AMDGPU_ABS64: + return R_ABS; + case R_AMDGPU_REL32: + case R_AMDGPU_REL32_LO: + case R_AMDGPU_REL32_HI: + return R_PC; + case R_AMDGPU_GOTPCREL: + case R_AMDGPU_GOTPCREL32_LO: + case R_AMDGPU_GOTPCREL32_HI: + return R_GOT_PC; + default: + fatal("do not know how to handle relocation " + Twine(Type)); + } +} + +ARMTargetInfo::ARMTargetInfo() { + CopyRel = R_ARM_COPY; + RelativeRel = R_ARM_RELATIVE; + IRelativeRel = R_ARM_IRELATIVE; + GotRel = R_ARM_GLOB_DAT; + PltRel = R_ARM_JUMP_SLOT; + TlsGotRel = R_ARM_TLS_TPOFF32; + TlsModuleIndexRel = R_ARM_TLS_DTPMOD32; + TlsOffsetRel = R_ARM_TLS_DTPOFF32; + GotEntrySize = 4; + GotPltEntrySize = 4; + PltEntrySize = 16; + PltHeaderSize = 20; + // ARM uses Variant 1 TLS + TcbSize = 8; + NeedsThunks = true; +} + +RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { + switch (Type) { + default: + return R_ABS; + case R_ARM_THM_JUMP11: + return R_PC; + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_PREL31: + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + case R_ARM_THM_CALL: + return R_PLT_PC; + case R_ARM_GOTOFF32: + // (S + A) - GOT_ORG + return R_GOTREL; + case R_ARM_GOT_BREL: + // GOT(S) + A - GOT_ORG + return R_GOT_OFF; + case R_ARM_GOT_PREL: + case R_ARM_TLS_IE32: + // GOT(S) + A - P + return R_GOT_PC; + case R_ARM_TARGET1: + return Config->Target1Rel ? R_PC : R_ABS; + case R_ARM_TARGET2: + if (Config->Target2 == Target2Policy::Rel) + return R_PC; + if (Config->Target2 == Target2Policy::Abs) + return R_ABS; + return R_GOT_PC; + case R_ARM_TLS_GD32: + return R_TLSGD_PC; + case R_ARM_TLS_LDM32: + return R_TLSLD_PC; + case R_ARM_BASE_PREL: + // B(S) + A - P + // FIXME: currently B(S) assumed to be .got, this may not hold for all + // platforms. + return R_GOTONLY_PC; + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVT_PREL: + case R_ARM_REL32: + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVT_PREL: + return R_PC; + case R_ARM_NONE: + return R_HINT; + case R_ARM_TLS_LE32: + return R_TLS; + } +} + +bool ARMTargetInfo::isPicRel(uint32_t Type) const { + return (Type == R_ARM_TARGET1 && !Config->Target1Rel) || + (Type == R_ARM_ABS32); +} + +uint32_t ARMTargetInfo::getDynRel(uint32_t Type) const { + if (Type == R_ARM_TARGET1 && !Config->Target1Rel) + return R_ARM_ABS32; + if (Type == R_ARM_ABS32) + return Type; + // Keep it going with a dummy value so that we can find more reloc errors. + return R_ARM_ABS32; +} + +void ARMTargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { + write32le(Buf, In<ELF32LE>::Plt->getVA()); +} + +void ARMTargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { + // An ARM entry is the address of the ifunc resolver function. + write32le(Buf, S.getVA<ELF32LE>()); +} + +void ARMTargetInfo::writePltHeader(uint8_t *Buf) const { + const uint8_t PltData[] = { + 0x04, 0xe0, 0x2d, 0xe5, // str lr, [sp,#-4]! + 0x04, 0xe0, 0x9f, 0xe5, // ldr lr, L2 + 0x0e, 0xe0, 0x8f, 0xe0, // L1: add lr, pc, lr + 0x08, 0xf0, 0xbe, 0xe5, // ldr pc, [lr, #8] + 0x00, 0x00, 0x00, 0x00, // L2: .word &(.got.plt) - L1 - 8 + }; + memcpy(Buf, PltData, sizeof(PltData)); + uint64_t GotPlt = In<ELF32LE>::GotPlt->getVA(); + uint64_t L1 = In<ELF32LE>::Plt->getVA() + 8; + write32le(Buf + 16, GotPlt - L1 - 8); +} + +void ARMTargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + // FIXME: Using simple code sequence with simple relocations. + // There is a more optimal sequence but it requires support for the group + // relocations. See ELF for the ARM Architecture Appendix A.3 + const uint8_t PltData[] = { + 0x04, 0xc0, 0x9f, 0xe5, // ldr ip, L2 + 0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc + 0x00, 0xf0, 0x9c, 0xe5, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00, // L2: .word Offset(&(.plt.got) - L1 - 8 + }; + memcpy(Buf, PltData, sizeof(PltData)); + uint64_t L1 = PltEntryAddr + 4; + write32le(Buf + 12, GotEntryAddr - L1 - 8); +} + +RelExpr ARMTargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType, + const InputFile &File, + const SymbolBody &S) const { + // If S is an undefined weak symbol in an executable we don't need a Thunk. + // In a DSO calls to undefined symbols, including weak ones get PLT entries + // which may need a thunk. + if (S.isUndefined() && !S.isLocal() && S.symbol()->isWeak() + && !Config->Shared) + return Expr; + // A state change from ARM to Thumb and vice versa must go through an + // interworking thunk if the relocation type is not R_ARM_CALL or + // R_ARM_THM_CALL. + switch (RelocType) { + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_JUMP24: + // Source is ARM, all PLT entries are ARM so no interworking required. + // Otherwise we need to interwork if Symbol has bit 0 set (Thumb). + if (Expr == R_PC && ((S.getVA<ELF32LE>() & 1) == 1)) + return R_THUNK_PC; + break; + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + // Source is Thumb, all PLT entries are ARM so interworking is required. + // Otherwise we need to interwork if Symbol has bit 0 clear (ARM). + if (Expr == R_PLT_PC) + return R_THUNK_PLT_PC; + if ((S.getVA<ELF32LE>() & 1) == 0) + return R_THUNK_PC; + break; + } + return Expr; +} + +void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + switch (Type) { + case R_ARM_ABS32: + case R_ARM_BASE_PREL: + case R_ARM_GLOB_DAT: + case R_ARM_GOTOFF32: + case R_ARM_GOT_BREL: + case R_ARM_GOT_PREL: + case R_ARM_REL32: + case R_ARM_RELATIVE: + case R_ARM_TARGET1: + case R_ARM_TARGET2: + case R_ARM_TLS_GD32: + case R_ARM_TLS_IE32: + case R_ARM_TLS_LDM32: + case R_ARM_TLS_LDO32: + case R_ARM_TLS_LE32: + case R_ARM_TLS_TPOFF32: + write32le(Loc, Val); + break; + case R_ARM_TLS_DTPMOD32: + write32le(Loc, 1); + break; + case R_ARM_PREL31: + checkInt<31>(Loc, Val, Type); + write32le(Loc, (read32le(Loc) & 0x80000000) | (Val & ~0x80000000)); + break; + case R_ARM_CALL: + // R_ARM_CALL is used for BL and BLX instructions, depending on the + // value of bit 0 of Val, we must select a BL or BLX instruction + if (Val & 1) { + // If bit 0 of Val is 1 the target is Thumb, we must select a BLX. + // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1' + checkInt<26>(Loc, Val, Type); + write32le(Loc, 0xfa000000 | // opcode + ((Val & 2) << 23) | // H + ((Val >> 2) & 0x00ffffff)); // imm24 + break; + } + if ((read32le(Loc) & 0xfe000000) == 0xfa000000) + // BLX (always unconditional) instruction to an ARM Target, select an + // unconditional BL. + write32le(Loc, 0xeb000000 | (read32le(Loc) & 0x00ffffff)); + // fall through as BL encoding is shared with B + case R_ARM_JUMP24: + case R_ARM_PC24: + case R_ARM_PLT32: + checkInt<26>(Loc, Val, Type); + write32le(Loc, (read32le(Loc) & ~0x00ffffff) | ((Val >> 2) & 0x00ffffff)); + break; + case R_ARM_THM_JUMP11: + checkInt<12>(Loc, Val, Type); + write16le(Loc, (read32le(Loc) & 0xf800) | ((Val >> 1) & 0x07ff)); + break; + case R_ARM_THM_JUMP19: + // Encoding T3: Val = S:J2:J1:imm6:imm11:0 + checkInt<21>(Loc, Val, Type); + write16le(Loc, + (read16le(Loc) & 0xfbc0) | // opcode cond + ((Val >> 10) & 0x0400) | // S + ((Val >> 12) & 0x003f)); // imm6 + write16le(Loc + 2, + 0x8000 | // opcode + ((Val >> 8) & 0x0800) | // J2 + ((Val >> 5) & 0x2000) | // J1 + ((Val >> 1) & 0x07ff)); // imm11 + break; + case R_ARM_THM_CALL: + // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the + // value of bit 0 of Val, we must select a BL or BLX instruction + if ((Val & 1) == 0) { + // Ensure BLX destination is 4-byte aligned. As BLX instruction may + // only be two byte aligned. This must be done before overflow check + Val = alignTo(Val, 4); + } + // Bit 12 is 0 for BLX, 1 for BL + write16le(Loc + 2, (read16le(Loc + 2) & ~0x1000) | (Val & 1) << 12); + // Fall through as rest of encoding is the same as B.W + case R_ARM_THM_JUMP24: + // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0 + // FIXME: Use of I1 and I2 require v6T2ops + checkInt<25>(Loc, Val, Type); + write16le(Loc, + 0xf000 | // opcode + ((Val >> 14) & 0x0400) | // S + ((Val >> 12) & 0x03ff)); // imm10 + write16le(Loc + 2, + (read16le(Loc + 2) & 0xd000) | // opcode + (((~(Val >> 10)) ^ (Val >> 11)) & 0x2000) | // J1 + (((~(Val >> 11)) ^ (Val >> 13)) & 0x0800) | // J2 + ((Val >> 1) & 0x07ff)); // imm11 + break; + case R_ARM_MOVW_ABS_NC: + case R_ARM_MOVW_PREL_NC: + write32le(Loc, (read32le(Loc) & ~0x000f0fff) | ((Val & 0xf000) << 4) | + (Val & 0x0fff)); + break; + case R_ARM_MOVT_ABS: + case R_ARM_MOVT_PREL: + checkInt<32>(Loc, Val, Type); + write32le(Loc, (read32le(Loc) & ~0x000f0fff) | + (((Val >> 16) & 0xf000) << 4) | ((Val >> 16) & 0xfff)); + break; + case R_ARM_THM_MOVT_ABS: + case R_ARM_THM_MOVT_PREL: + // Encoding T1: A = imm4:i:imm3:imm8 + checkInt<32>(Loc, Val, Type); + write16le(Loc, + 0xf2c0 | // opcode + ((Val >> 17) & 0x0400) | // i + ((Val >> 28) & 0x000f)); // imm4 + write16le(Loc + 2, + (read16le(Loc + 2) & 0x8f00) | // opcode + ((Val >> 12) & 0x7000) | // imm3 + ((Val >> 16) & 0x00ff)); // imm8 + break; + case R_ARM_THM_MOVW_ABS_NC: + case R_ARM_THM_MOVW_PREL_NC: + // Encoding T3: A = imm4:i:imm3:imm8 + write16le(Loc, + 0xf240 | // opcode + ((Val >> 1) & 0x0400) | // i + ((Val >> 12) & 0x000f)); // imm4 + write16le(Loc + 2, + (read16le(Loc + 2) & 0x8f00) | // opcode + ((Val << 4) & 0x7000) | // imm3 + (Val & 0x00ff)); // imm8 + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +uint64_t ARMTargetInfo::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { + switch (Type) { + default: + return 0; + case R_ARM_ABS32: + case R_ARM_BASE_PREL: + case R_ARM_GOTOFF32: + case R_ARM_GOT_BREL: + case R_ARM_GOT_PREL: + case R_ARM_REL32: + case R_ARM_TARGET1: + case R_ARM_TARGET2: + case R_ARM_TLS_GD32: + case R_ARM_TLS_LDM32: + case R_ARM_TLS_LDO32: + case R_ARM_TLS_IE32: + case R_ARM_TLS_LE32: + return SignExtend64<32>(read32le(Buf)); + case R_ARM_PREL31: + return SignExtend64<31>(read32le(Buf)); + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PC24: + case R_ARM_PLT32: + return SignExtend64<26>(read32le(Buf) << 2); + case R_ARM_THM_JUMP11: + return SignExtend64<12>(read16le(Buf) << 1); + case R_ARM_THM_JUMP19: { + // Encoding T3: A = S:J2:J1:imm10:imm6:0 + uint16_t Hi = read16le(Buf); + uint16_t Lo = read16le(Buf + 2); + return SignExtend64<20>(((Hi & 0x0400) << 10) | // S + ((Lo & 0x0800) << 8) | // J2 + ((Lo & 0x2000) << 5) | // J1 + ((Hi & 0x003f) << 12) | // imm6 + ((Lo & 0x07ff) << 1)); // imm11:0 + } + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: { + // Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0 + // I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S) + // FIXME: I1 and I2 require v6T2ops + uint16_t Hi = read16le(Buf); + uint16_t Lo = read16le(Buf + 2); + return SignExtend64<24>(((Hi & 0x0400) << 14) | // S + (~((Lo ^ (Hi << 3)) << 10) & 0x00800000) | // I1 + (~((Lo ^ (Hi << 1)) << 11) & 0x00400000) | // I2 + ((Hi & 0x003ff) << 12) | // imm0 + ((Lo & 0x007ff) << 1)); // imm11:0 + } + // ELF for the ARM Architecture 4.6.1.1 the implicit addend for MOVW and + // MOVT is in the range -32768 <= A < 32768 + case R_ARM_MOVW_ABS_NC: + case R_ARM_MOVT_ABS: + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVT_PREL: { + uint64_t Val = read32le(Buf) & 0x000f0fff; + return SignExtend64<16>(((Val & 0x000f0000) >> 4) | (Val & 0x00fff)); + } + case R_ARM_THM_MOVW_ABS_NC: + case R_ARM_THM_MOVT_ABS: + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVT_PREL: { + // Encoding T3: A = imm4:i:imm3:imm8 + uint16_t Hi = read16le(Buf); + uint16_t Lo = read16le(Buf + 2); + return SignExtend64<16>(((Hi & 0x000f) << 12) | // imm4 + ((Hi & 0x0400) << 1) | // i + ((Lo & 0x7000) >> 4) | // imm3 + (Lo & 0x00ff)); // imm8 + } + } +} + +bool ARMTargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { + return Type == R_ARM_TLS_LDO32 || Type == R_ARM_TLS_LDM32; +} + +bool ARMTargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { + return Type == R_ARM_TLS_GD32; +} + +bool ARMTargetInfo::isTlsInitialExecRel(uint32_t Type) const { + return Type == R_ARM_TLS_IE32; +} + +template <class ELFT> MipsTargetInfo<ELFT>::MipsTargetInfo() { + GotPltHeaderEntriesNum = 2; + DefaultMaxPageSize = 65536; + GotEntrySize = sizeof(typename ELFT::uint); + GotPltEntrySize = sizeof(typename ELFT::uint); + PltEntrySize = 16; + PltHeaderSize = 32; + CopyRel = R_MIPS_COPY; + PltRel = R_MIPS_JUMP_SLOT; + NeedsThunks = true; + if (ELFT::Is64Bits) { + RelativeRel = (R_MIPS_64 << 8) | R_MIPS_REL32; + TlsGotRel = R_MIPS_TLS_TPREL64; + TlsModuleIndexRel = R_MIPS_TLS_DTPMOD64; + TlsOffsetRel = R_MIPS_TLS_DTPREL64; + } else { + RelativeRel = R_MIPS_REL32; + TlsGotRel = R_MIPS_TLS_TPREL32; + TlsModuleIndexRel = R_MIPS_TLS_DTPMOD32; + TlsOffsetRel = R_MIPS_TLS_DTPREL32; + } +} + +template <class ELFT> +RelExpr MipsTargetInfo<ELFT>::getRelExpr(uint32_t Type, + const SymbolBody &S) const { + // See comment in the calculateMipsRelChain. + if (ELFT::Is64Bits || Config->MipsN32Abi) + Type &= 0xff; + switch (Type) { + default: + return R_ABS; + case R_MIPS_JALR: + return R_HINT; + case R_MIPS_GPREL16: + case R_MIPS_GPREL32: + return R_MIPS_GOTREL; + case R_MIPS_26: + return R_PLT; + case R_MIPS_HI16: + case R_MIPS_LO16: + case R_MIPS_GOT_OFST: + // R_MIPS_HI16/R_MIPS_LO16 relocations against _gp_disp calculate + // offset between start of function and 'gp' value which by default + // equal to the start of .got section. In that case we consider these + // relocations as relative. + if (&S == ElfSym<ELFT>::MipsGpDisp) + return R_PC; + return R_ABS; + case R_MIPS_PC32: + case R_MIPS_PC16: + case R_MIPS_PC19_S2: + case R_MIPS_PC21_S2: + case R_MIPS_PC26_S2: + case R_MIPS_PCHI16: + case R_MIPS_PCLO16: + return R_PC; + case R_MIPS_GOT16: + if (S.isLocal()) + return R_MIPS_GOT_LOCAL_PAGE; + // fallthrough + case R_MIPS_CALL16: + case R_MIPS_GOT_DISP: + case R_MIPS_TLS_GOTTPREL: + return R_MIPS_GOT_OFF; + case R_MIPS_CALL_HI16: + case R_MIPS_CALL_LO16: + case R_MIPS_GOT_HI16: + case R_MIPS_GOT_LO16: + return R_MIPS_GOT_OFF32; + case R_MIPS_GOT_PAGE: + return R_MIPS_GOT_LOCAL_PAGE; + case R_MIPS_TLS_GD: + return R_MIPS_TLSGD; + case R_MIPS_TLS_LDM: + return R_MIPS_TLSLD; + } +} + +template <class ELFT> bool MipsTargetInfo<ELFT>::isPicRel(uint32_t Type) const { + return Type == R_MIPS_32 || Type == R_MIPS_64; +} + +template <class ELFT> +uint32_t MipsTargetInfo<ELFT>::getDynRel(uint32_t Type) const { + return RelativeRel; +} + +template <class ELFT> +bool MipsTargetInfo<ELFT>::isTlsLocalDynamicRel(uint32_t Type) const { + return Type == R_MIPS_TLS_LDM; +} + +template <class ELFT> +bool MipsTargetInfo<ELFT>::isTlsGlobalDynamicRel(uint32_t Type) const { + return Type == R_MIPS_TLS_GD; +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { + write32<ELFT::TargetEndianness>(Buf, In<ELFT>::Plt->getVA()); +} + +template <endianness E, uint8_t BSIZE, uint8_t SHIFT> +static int64_t getPcRelocAddend(const uint8_t *Loc) { + uint32_t Instr = read32<E>(Loc); + uint32_t Mask = 0xffffffff >> (32 - BSIZE); + return SignExtend64<BSIZE + SHIFT>((Instr & Mask) << SHIFT); +} + +template <endianness E, uint8_t BSIZE, uint8_t SHIFT> +static void applyMipsPcReloc(uint8_t *Loc, uint32_t Type, uint64_t V) { + uint32_t Mask = 0xffffffff >> (32 - BSIZE); + uint32_t Instr = read32<E>(Loc); + if (SHIFT > 0) + checkAlignment<(1 << SHIFT)>(Loc, V, Type); + checkInt<BSIZE + SHIFT>(Loc, V, Type); + write32<E>(Loc, (Instr & ~Mask) | ((V >> SHIFT) & Mask)); +} + +template <endianness E> static void writeMipsHi16(uint8_t *Loc, uint64_t V) { + uint32_t Instr = read32<E>(Loc); + uint16_t Res = ((V + 0x8000) >> 16) & 0xffff; + write32<E>(Loc, (Instr & 0xffff0000) | Res); +} + +template <endianness E> static void writeMipsHigher(uint8_t *Loc, uint64_t V) { + uint32_t Instr = read32<E>(Loc); + uint16_t Res = ((V + 0x80008000) >> 32) & 0xffff; + write32<E>(Loc, (Instr & 0xffff0000) | Res); +} + +template <endianness E> static void writeMipsHighest(uint8_t *Loc, uint64_t V) { + uint32_t Instr = read32<E>(Loc); + uint16_t Res = ((V + 0x800080008000) >> 48) & 0xffff; + write32<E>(Loc, (Instr & 0xffff0000) | Res); +} + +template <endianness E> static void writeMipsLo16(uint8_t *Loc, uint64_t V) { + uint32_t Instr = read32<E>(Loc); + write32<E>(Loc, (Instr & 0xffff0000) | (V & 0xffff)); +} + +template <class ELFT> static bool isMipsR6() { + const auto &FirstObj = cast<ELFFileBase<ELFT>>(*Config->FirstElf); + uint32_t Arch = FirstObj.getObj().getHeader()->e_flags & EF_MIPS_ARCH; + return Arch == EF_MIPS_ARCH_32R6 || Arch == EF_MIPS_ARCH_64R6; +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::writePltHeader(uint8_t *Buf) const { + const endianness E = ELFT::TargetEndianness; + if (Config->MipsN32Abi) { + write32<E>(Buf, 0x3c0e0000); // lui $14, %hi(&GOTPLT[0]) + write32<E>(Buf + 4, 0x8dd90000); // lw $25, %lo(&GOTPLT[0])($14) + write32<E>(Buf + 8, 0x25ce0000); // addiu $14, $14, %lo(&GOTPLT[0]) + write32<E>(Buf + 12, 0x030ec023); // subu $24, $24, $14 + } else { + write32<E>(Buf, 0x3c1c0000); // lui $28, %hi(&GOTPLT[0]) + write32<E>(Buf + 4, 0x8f990000); // lw $25, %lo(&GOTPLT[0])($28) + write32<E>(Buf + 8, 0x279c0000); // addiu $28, $28, %lo(&GOTPLT[0]) + write32<E>(Buf + 12, 0x031cc023); // subu $24, $24, $28 + } + write32<E>(Buf + 16, 0x03e07825); // move $15, $31 + write32<E>(Buf + 20, 0x0018c082); // srl $24, $24, 2 + write32<E>(Buf + 24, 0x0320f809); // jalr $25 + write32<E>(Buf + 28, 0x2718fffe); // subu $24, $24, 2 + uint64_t Got = In<ELFT>::GotPlt->getVA(); + writeMipsHi16<E>(Buf, Got); + writeMipsLo16<E>(Buf + 4, Got); + writeMipsLo16<E>(Buf + 8, Got); +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const endianness E = ELFT::TargetEndianness; + write32<E>(Buf, 0x3c0f0000); // lui $15, %hi(.got.plt entry) + write32<E>(Buf + 4, 0x8df90000); // l[wd] $25, %lo(.got.plt entry)($15) + // jr $25 + write32<E>(Buf + 8, isMipsR6<ELFT>() ? 0x03200009 : 0x03200008); + write32<E>(Buf + 12, 0x25f80000); // addiu $24, $15, %lo(.got.plt entry) + writeMipsHi16<E>(Buf, GotEntryAddr); + writeMipsLo16<E>(Buf + 4, GotEntryAddr); + writeMipsLo16<E>(Buf + 12, GotEntryAddr); +} + +template <class ELFT> +RelExpr MipsTargetInfo<ELFT>::getThunkExpr(RelExpr Expr, uint32_t Type, + const InputFile &File, + const SymbolBody &S) const { + // Any MIPS PIC code function is invoked with its address in register $t9. + // So if we have a branch instruction from non-PIC code to the PIC one + // we cannot make the jump directly and need to create a small stubs + // to save the target function address. + // See page 3-38 ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Type != R_MIPS_26) + return Expr; + auto *F = dyn_cast<ELFFileBase<ELFT>>(&File); + if (!F) + return Expr; + // If current file has PIC code, LA25 stub is not required. + if (F->getObj().getHeader()->e_flags & EF_MIPS_PIC) + return Expr; + auto *D = dyn_cast<DefinedRegular<ELFT>>(&S); + // LA25 is required if target file has PIC code + // or target symbol is a PIC symbol. + return D && D->isMipsPIC() ? R_THUNK_ABS : Expr; +} + +template <class ELFT> +uint64_t MipsTargetInfo<ELFT>::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { + const endianness E = ELFT::TargetEndianness; + switch (Type) { + default: + return 0; + case R_MIPS_32: + case R_MIPS_GPREL32: + case R_MIPS_TLS_DTPREL32: + case R_MIPS_TLS_TPREL32: + return read32<E>(Buf); + case R_MIPS_26: + // FIXME (simon): If the relocation target symbol is not a PLT entry + // we should use another expression for calculation: + // ((A << 2) | (P & 0xf0000000)) >> 2 + return SignExtend64<28>((read32<E>(Buf) & 0x3ffffff) << 2); + case R_MIPS_GPREL16: + case R_MIPS_LO16: + case R_MIPS_PCLO16: + case R_MIPS_TLS_DTPREL_HI16: + case R_MIPS_TLS_DTPREL_LO16: + case R_MIPS_TLS_TPREL_HI16: + case R_MIPS_TLS_TPREL_LO16: + return SignExtend64<16>(read32<E>(Buf)); + case R_MIPS_PC16: + return getPcRelocAddend<E, 16, 2>(Buf); + case R_MIPS_PC19_S2: + return getPcRelocAddend<E, 19, 2>(Buf); + case R_MIPS_PC21_S2: + return getPcRelocAddend<E, 21, 2>(Buf); + case R_MIPS_PC26_S2: + return getPcRelocAddend<E, 26, 2>(Buf); + case R_MIPS_PC32: + return getPcRelocAddend<E, 32, 0>(Buf); + } +} + +static std::pair<uint32_t, uint64_t> +calculateMipsRelChain(uint8_t *Loc, uint32_t Type, uint64_t Val) { + // MIPS N64 ABI packs multiple relocations into the single relocation + // record. In general, all up to three relocations can have arbitrary + // types. In fact, Clang and GCC uses only a few combinations. For now, + // we support two of them. That is allow to pass at least all LLVM + // test suite cases. + // <any relocation> / R_MIPS_SUB / R_MIPS_HI16 | R_MIPS_LO16 + // <any relocation> / R_MIPS_64 / R_MIPS_NONE + // The first relocation is a 'real' relocation which is calculated + // using the corresponding symbol's value. The second and the third + // relocations used to modify result of the first one: extend it to + // 64-bit, extract high or low part etc. For details, see part 2.9 Relocation + // at the https://dmz-portal.mips.com/mw/images/8/82/007-4658-001.pdf + uint32_t Type2 = (Type >> 8) & 0xff; + uint32_t Type3 = (Type >> 16) & 0xff; + if (Type2 == R_MIPS_NONE && Type3 == R_MIPS_NONE) + return std::make_pair(Type, Val); + if (Type2 == R_MIPS_64 && Type3 == R_MIPS_NONE) + return std::make_pair(Type2, Val); + if (Type2 == R_MIPS_SUB && (Type3 == R_MIPS_HI16 || Type3 == R_MIPS_LO16)) + return std::make_pair(Type3, -Val); + error(getErrorLocation(Loc) + "unsupported relocations combination " + + Twine(Type)); + return std::make_pair(Type & 0xff, Val); +} + +template <class ELFT> +void MipsTargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, + uint64_t Val) const { + const endianness E = ELFT::TargetEndianness; + // Thread pointer and DRP offsets from the start of TLS data area. + // https://www.linux-mips.org/wiki/NPTL + if (Type == R_MIPS_TLS_DTPREL_HI16 || Type == R_MIPS_TLS_DTPREL_LO16 || + Type == R_MIPS_TLS_DTPREL32 || Type == R_MIPS_TLS_DTPREL64) + Val -= 0x8000; + else if (Type == R_MIPS_TLS_TPREL_HI16 || Type == R_MIPS_TLS_TPREL_LO16 || + Type == R_MIPS_TLS_TPREL32 || Type == R_MIPS_TLS_TPREL64) + Val -= 0x7000; + if (ELFT::Is64Bits || Config->MipsN32Abi) + std::tie(Type, Val) = calculateMipsRelChain(Loc, Type, Val); + switch (Type) { + case R_MIPS_32: + case R_MIPS_GPREL32: + case R_MIPS_TLS_DTPREL32: + case R_MIPS_TLS_TPREL32: + write32<E>(Loc, Val); + break; + case R_MIPS_64: + case R_MIPS_TLS_DTPREL64: + case R_MIPS_TLS_TPREL64: + write64<E>(Loc, Val); + break; + case R_MIPS_26: + write32<E>(Loc, (read32<E>(Loc) & ~0x3ffffff) | ((Val >> 2) & 0x3ffffff)); + break; + case R_MIPS_GOT_DISP: + case R_MIPS_GOT_PAGE: + case R_MIPS_GOT16: + case R_MIPS_GPREL16: + case R_MIPS_TLS_GD: + case R_MIPS_TLS_LDM: + checkInt<16>(Loc, Val, Type); + // fallthrough + case R_MIPS_CALL16: + case R_MIPS_CALL_LO16: + case R_MIPS_GOT_LO16: + case R_MIPS_GOT_OFST: + case R_MIPS_LO16: + case R_MIPS_PCLO16: + case R_MIPS_TLS_DTPREL_LO16: + case R_MIPS_TLS_GOTTPREL: + case R_MIPS_TLS_TPREL_LO16: + writeMipsLo16<E>(Loc, Val); + break; + case R_MIPS_CALL_HI16: + case R_MIPS_GOT_HI16: + case R_MIPS_HI16: + case R_MIPS_PCHI16: + case R_MIPS_TLS_DTPREL_HI16: + case R_MIPS_TLS_TPREL_HI16: + writeMipsHi16<E>(Loc, Val); + break; + case R_MIPS_HIGHER: + writeMipsHigher<E>(Loc, Val); + break; + case R_MIPS_HIGHEST: + writeMipsHighest<E>(Loc, Val); + break; + case R_MIPS_JALR: + // Ignore this optimization relocation for now + break; + case R_MIPS_PC16: + applyMipsPcReloc<E, 16, 2>(Loc, Type, Val); + break; + case R_MIPS_PC19_S2: + applyMipsPcReloc<E, 19, 2>(Loc, Type, Val); + break; + case R_MIPS_PC21_S2: + applyMipsPcReloc<E, 21, 2>(Loc, Type, Val); + break; + case R_MIPS_PC26_S2: + applyMipsPcReloc<E, 26, 2>(Loc, Type, Val); + break; + case R_MIPS_PC32: + applyMipsPcReloc<E, 32, 0>(Loc, Type, Val); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +template <class ELFT> +bool MipsTargetInfo<ELFT>::usesOnlyLowPageBits(uint32_t Type) const { + return Type == R_MIPS_LO16 || Type == R_MIPS_GOT_OFST; +} +} +} diff --git a/contrib/llvm/tools/lld/ELF/Target.h b/contrib/llvm/tools/lld/ELF/Target.h new file mode 100644 index 000000000000..752f9cd5ee4e --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Target.h @@ -0,0 +1,117 @@ +//===- Target.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_TARGET_H +#define LLD_ELF_TARGET_H + +#include "InputSection.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELF.h" + +#include <memory> + +namespace lld { +namespace elf { +class InputFile; +class SymbolBody; + +class TargetInfo { +public: + virtual bool isTlsInitialExecRel(uint32_t Type) const; + virtual bool isTlsLocalDynamicRel(uint32_t Type) const; + virtual bool isTlsGlobalDynamicRel(uint32_t Type) const; + virtual bool isPicRel(uint32_t Type) const { return true; } + virtual uint32_t getDynRel(uint32_t Type) const { return Type; } + virtual void writeGotPltHeader(uint8_t *Buf) const {} + virtual void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {}; + virtual void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const; + virtual uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const; + + // If lazy binding is supported, the first entry of the PLT has code + // to call the dynamic linker to resolve PLT entries the first time + // they are called. This function writes that code. + virtual void writePltHeader(uint8_t *Buf) const {} + + virtual void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const {} + + // Returns true if a relocation only uses the low bits of a value such that + // all those bits are in in the same page. For example, if the relocation + // only uses the low 12 bits in a system with 4k pages. If this is true, the + // bits will always have the same value at runtime and we don't have to emit + // a dynamic relocation. + virtual bool usesOnlyLowPageBits(uint32_t Type) const; + + // Decide whether a Thunk is needed for the relocation from File + // targeting S. Returns one of: + // Expr if there is no Thunk required + // R_THUNK_ABS if thunk is required and expression is absolute + // R_THUNK_PC if thunk is required and expression is pc rel + // R_THUNK_PLT_PC if thunk is required to PLT entry and expression is pc rel + virtual RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, + const InputFile &File, + const SymbolBody &S) const; + virtual RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const = 0; + virtual void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const = 0; + virtual ~TargetInfo(); + + unsigned TlsGdRelaxSkip = 1; + unsigned PageSize = 4096; + unsigned DefaultMaxPageSize = 4096; + + // On FreeBSD x86_64 the first page cannot be mmaped. + // On Linux that is controled by vm.mmap_min_addr. At least on some x86_64 + // installs that is 65536, so the first 15 pages cannot be used. + // Given that, the smallest value that can be used in here is 0x10000. + uint64_t DefaultImageBase = 0x10000; + + uint32_t CopyRel; + uint32_t GotRel; + uint32_t PltRel; + uint32_t RelativeRel; + uint32_t IRelativeRel; + uint32_t TlsDescRel; + uint32_t TlsGotRel; + uint32_t TlsModuleIndexRel; + uint32_t TlsOffsetRel; + unsigned GotEntrySize = 0; + unsigned GotPltEntrySize = 0; + unsigned PltEntrySize; + unsigned PltHeaderSize; + + // At least on x86_64 positions 1 and 2 are used by the first plt entry + // to support lazy loading. + unsigned GotPltHeaderEntriesNum = 3; + + // Set to 0 for variant 2 + unsigned TcbSize = 0; + + bool NeedsThunks = false; + + virtual RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, + RelExpr Expr) const; + virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; + virtual void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; + virtual void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; + virtual void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; + virtual void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; +}; + +uint64_t getPPC64TocBase(); +uint64_t getAArch64Page(uint64_t Expr); + +extern TargetInfo *Target; +TargetInfo *createTarget(); +} + +std::string toString(uint32_t RelType); +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Threads.h b/contrib/llvm/tools/lld/ELF/Threads.h new file mode 100644 index 000000000000..c03e15253e15 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Threads.h @@ -0,0 +1,90 @@ +//===- Threads.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// LLD supports threads to distribute workloads to multiple cores. Using +// multicore is most effective when more than one core are idle. At the +// last step of a build, it is often the case that a linker is the only +// active process on a computer. So, we are naturally interested in using +// threads wisely to reduce latency to deliver results to users. +// +// That said, we don't want to do "too clever" things using threads. +// Complex multi-threaded algorithms are sometimes extremely hard to +// justify the correctness and can easily mess up the entire design. +// +// Fortunately, when a linker links large programs (when the link time is +// most critical), it spends most of the time to work on massive number of +// small pieces of data of the same kind, and there are opportunities for +// large parallelism there. Here are examples: +// +// - We have hundreds of thousands of input sections that need to be +// copied to a result file at the last step of link. Once we fix a file +// layout, each section can be copied to its destination and its +// relocations can be applied independently. +// +// - We have tens of millions of small strings when constructing a +// mergeable string section. +// +// For the cases such as the former, we can just use parallel_for_each +// instead of std::for_each (or a plain for loop). Because tasks are +// completely independent from each other, we can run them in parallel +// without any coordination between them. That's very easy to understand +// and justify. +// +// For the cases such as the latter, we can use parallel algorithms to +// deal with massive data. We have to write code for a tailored algorithm +// for each problem, but the complexity of multi-threading is isolated in +// a single pass and doesn't affect the linker's overall design. +// +// The above approach seems to be working fairly well. As an example, when +// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to +// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my +// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from +// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the +// speedup is not linear, but as you add more cores, it gets faster. +// +// On a final note, if you are trying to optimize, keep the axiom "don't +// guess, measure!" in mind. Some important passes of the linker are not +// that slow. For example, resolving all symbols is not a very heavy pass, +// although it would be very hard to parallelize it. You want to first +// identify a slow pass and then optimize it. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_THREADS_H +#define LLD_ELF_THREADS_H + +#include "Config.h" + +#include "lld/Core/Parallel.h" +#include <algorithm> +#include <functional> + +namespace lld { +namespace elf { + +template <class IterTy, class FuncTy> +void forEach(IterTy Begin, IterTy End, FuncTy Fn) { + if (Config->Threads) + parallel_for_each(Begin, End, Fn); + else + std::for_each(Begin, End, Fn); +} + +inline void forLoop(size_t Begin, size_t End, std::function<void(size_t)> Fn) { + if (Config->Threads) { + parallel_for(Begin, End, Fn); + } else { + for (size_t I = Begin; I < End; ++I) + Fn(I); + } +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Thunks.cpp b/contrib/llvm/tools/lld/ELF/Thunks.cpp new file mode 100644 index 000000000000..397a0ee66319 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Thunks.cpp @@ -0,0 +1,275 @@ +//===- Thunks.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains Thunk subclasses. +// +// A thunk is a small piece of code written after an input section +// which is used to jump between "incompatible" functions +// such as MIPS PIC and non-PIC or ARM non-Thumb and Thumb functions. +// +// If a jump target is too far and its address doesn't fit to a +// short jump instruction, we need to create a thunk too, but we +// haven't supported it yet. +// +// i386 and x86-64 don't need thunks. +// +//===---------------------------------------------------------------------===// + +#include "Thunks.h" +#include "Config.h" +#include "Error.h" +#include "InputSection.h" +#include "Memory.h" +#include "OutputSections.h" +#include "Symbols.h" +#include "Target.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <cstdint> +#include <cstring> + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; + +namespace lld { +namespace elf { + +namespace { + +// Specific ARM Thunk implementations. The naming convention is: +// Source State, TargetState, Target Requirement, ABS or PI, Range +template <class ELFT> +class ARMToThumbV7ABSLongThunk final : public Thunk<ELFT> { +public: + ARMToThumbV7ABSLongThunk(const SymbolBody &Dest, + const InputSection<ELFT> &Owner) + : Thunk<ELFT>(Dest, Owner) {} + + uint32_t size() const override { return 12; } + void writeTo(uint8_t *Buf) const override; +}; + +template <class ELFT> class ARMToThumbV7PILongThunk final : public Thunk<ELFT> { +public: + ARMToThumbV7PILongThunk(const SymbolBody &Dest, + const InputSection<ELFT> &Owner) + : Thunk<ELFT>(Dest, Owner) {} + + uint32_t size() const override { return 16; } + void writeTo(uint8_t *Buf) const override; +}; + +template <class ELFT> +class ThumbToARMV7ABSLongThunk final : public Thunk<ELFT> { +public: + ThumbToARMV7ABSLongThunk(const SymbolBody &Dest, + const InputSection<ELFT> &Owner) + : Thunk<ELFT>(Dest, Owner) {} + + uint32_t size() const override { return 10; } + void writeTo(uint8_t *Buf) const override; +}; + +template <class ELFT> class ThumbToARMV7PILongThunk final : public Thunk<ELFT> { +public: + ThumbToARMV7PILongThunk(const SymbolBody &Dest, + const InputSection<ELFT> &Owner) + : Thunk<ELFT>(Dest, Owner) {} + + uint32_t size() const override { return 12; } + void writeTo(uint8_t *Buf) const override; +}; + +// MIPS LA25 thunk +template <class ELFT> class MipsThunk final : public Thunk<ELFT> { +public: + MipsThunk(const SymbolBody &Dest, const InputSection<ELFT> &Owner) + : Thunk<ELFT>(Dest, Owner) {} + + uint32_t size() const override { return 16; } + void writeTo(uint8_t *Buf) const override; +}; + +} // end anonymous namespace + +// ARM Target Thunks +template <class ELFT> static uint64_t getARMThunkDestVA(const SymbolBody &S) { + uint64_t V = S.isInPlt() ? S.getPltVA<ELFT>() : S.getVA<ELFT>(); + return SignExtend64<32>(V); +} + +template <class ELFT> +void ARMToThumbV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf) const { + const uint8_t Data[] = { + 0x00, 0xc0, 0x00, 0xe3, // movw ip,:lower16:S + 0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S + 0x1c, 0xff, 0x2f, 0xe1, // bx ip + }; + uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); + memcpy(Buf, Data, sizeof(Data)); + Target->relocateOne(Buf, R_ARM_MOVW_ABS_NC, S); + Target->relocateOne(Buf + 4, R_ARM_MOVT_ABS, S); +} + +template <class ELFT> +void ThumbToARMV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf) const { + const uint8_t Data[] = { + 0x40, 0xf2, 0x00, 0x0c, // movw ip, :lower16:S + 0xc0, 0xf2, 0x00, 0x0c, // movt ip, :upper16:S + 0x60, 0x47, // bx ip + }; + uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); + memcpy(Buf, Data, sizeof(Data)); + Target->relocateOne(Buf, R_ARM_THM_MOVW_ABS_NC, S); + Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_ABS, S); +} + +template <class ELFT> +void ARMToThumbV7PILongThunk<ELFT>::writeTo(uint8_t *Buf) const { + const uint8_t Data[] = { + 0xf0, 0xcf, 0x0f, 0xe3, // P: movw ip,:lower16:S - (P + (L1-P) +8) + 0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S - (P + (L1-P+4) +8) + 0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc + 0x1c, 0xff, 0x2f, 0xe1, // bx r12 + }; + uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); + uint64_t P = this->getVA(); + memcpy(Buf, Data, sizeof(Data)); + Target->relocateOne(Buf, R_ARM_MOVW_PREL_NC, S - P - 16); + Target->relocateOne(Buf + 4, R_ARM_MOVT_PREL, S - P - 12); +} + +template <class ELFT> +void ThumbToARMV7PILongThunk<ELFT>::writeTo(uint8_t *Buf) const { + const uint8_t Data[] = { + 0x4f, 0xf6, 0xf4, 0x7c, // P: movw ip,:lower16:S - (P + (L1-P) + 4) + 0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P+4) + 4) + 0xfc, 0x44, // L1: add r12, pc + 0x60, 0x47, // bx r12 + }; + uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); + uint64_t P = this->getVA(); + memcpy(Buf, Data, sizeof(Data)); + Target->relocateOne(Buf, R_ARM_THM_MOVW_PREL_NC, S - P - 12); + Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_PREL, S - P - 8); +} + +// Write MIPS LA25 thunk code to call PIC function from the non-PIC one. +template <class ELFT> void MipsThunk<ELFT>::writeTo(uint8_t *Buf) const { + const endianness E = ELFT::TargetEndianness; + + uint64_t S = this->Destination.template getVA<ELFT>(); + write32<E>(Buf, 0x3c190000); // lui $25, %hi(func) + write32<E>(Buf + 4, 0x08000000 | (S >> 2)); // j func + write32<E>(Buf + 8, 0x27390000); // addiu $25, $25, %lo(func) + write32<E>(Buf + 12, 0x00000000); // nop + Target->relocateOne(Buf, R_MIPS_HI16, S); + Target->relocateOne(Buf + 8, R_MIPS_LO16, S); +} + +template <class ELFT> +Thunk<ELFT>::Thunk(const SymbolBody &D, const InputSection<ELFT> &O) + : Destination(D), Owner(O), Offset(O.getThunkOff() + O.getThunksSize()) {} + +template <class ELFT> typename ELFT::uint Thunk<ELFT>::getVA() const { + return Owner.OutSec->Addr + Owner.OutSecOff + Offset; +} + +template <class ELFT> Thunk<ELFT>::~Thunk() = default; + +// Creates a thunk for Thumb-ARM interworking. +template <class ELFT> +static Thunk<ELFT> *createThunkArm(uint32_t Reloc, SymbolBody &S, + InputSection<ELFT> &IS) { + // ARM relocations need ARM to Thumb interworking Thunks. + // Thumb relocations need Thumb to ARM relocations. + // Use position independent Thunks if we require position independent code. + switch (Reloc) { + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_JUMP24: + if (Config->Pic) + return new (BAlloc) ARMToThumbV7PILongThunk<ELFT>(S, IS); + return new (BAlloc) ARMToThumbV7ABSLongThunk<ELFT>(S, IS); + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + if (Config->Pic) + return new (BAlloc) ThumbToARMV7PILongThunk<ELFT>(S, IS); + return new (BAlloc) ThumbToARMV7ABSLongThunk<ELFT>(S, IS); + } + fatal("unrecognized relocation type"); +} + +template <class ELFT> +static void addThunkARM(uint32_t Reloc, SymbolBody &S, InputSection<ELFT> &IS) { + // Only one Thunk supported per symbol. + if (S.hasThunk<ELFT>()) + return; + + // ARM Thunks are added to the same InputSection as the relocation. This + // isn't strictly necessary but it makes it more likely that a limited range + // branch can reach the Thunk, and it makes Thunks to the PLT section easier + Thunk<ELFT> *T = createThunkArm(Reloc, S, IS); + IS.addThunk(T); + if (auto *Sym = dyn_cast<DefinedRegular<ELFT>>(&S)) + Sym->ThunkData = T; + else if (auto *Sym = dyn_cast<SharedSymbol<ELFT>>(&S)) + Sym->ThunkData = T; + else if (auto *Sym = dyn_cast<Undefined<ELFT>>(&S)) + Sym->ThunkData = T; + else + fatal("symbol not DefinedRegular or Shared"); +} + +template <class ELFT> +static void addThunkMips(uint32_t RelocType, SymbolBody &S, + InputSection<ELFT> &IS) { + // Only one Thunk supported per symbol. + if (S.hasThunk<ELFT>()) + return; + + // Mips Thunks are added to the InputSection defining S. + auto *R = cast<DefinedRegular<ELFT>>(&S); + auto *Sec = cast<InputSection<ELFT>>(R->Section); + auto *T = new (BAlloc) MipsThunk<ELFT>(S, *Sec); + Sec->addThunk(T); + R->ThunkData = T; +} + +template <class ELFT> +void addThunk(uint32_t RelocType, SymbolBody &S, InputSection<ELFT> &IS) { + if (Config->EMachine == EM_ARM) + addThunkARM<ELFT>(RelocType, S, IS); + else if (Config->EMachine == EM_MIPS) + addThunkMips<ELFT>(RelocType, S, IS); + else + llvm_unreachable("add Thunk only supported for ARM and Mips"); +} + +template void addThunk<ELF32LE>(uint32_t, SymbolBody &, + InputSection<ELF32LE> &); +template void addThunk<ELF32BE>(uint32_t, SymbolBody &, + InputSection<ELF32BE> &); +template void addThunk<ELF64LE>(uint32_t, SymbolBody &, + InputSection<ELF64LE> &); +template void addThunk<ELF64BE>(uint32_t, SymbolBody &, + InputSection<ELF64BE> &); + +template class Thunk<ELF32LE>; +template class Thunk<ELF32BE>; +template class Thunk<ELF64LE>; +template class Thunk<ELF64BE>; + +} // end namespace elf +} // end namespace lld diff --git a/contrib/llvm/tools/lld/ELF/Thunks.h b/contrib/llvm/tools/lld/ELF/Thunks.h new file mode 100644 index 000000000000..b937d7918491 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Thunks.h @@ -0,0 +1,56 @@ +//===- Thunks.h --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_THUNKS_H +#define LLD_ELF_THUNKS_H + +#include "Relocations.h" + +namespace lld { +namespace elf { +class SymbolBody; +template <class ELFT> class InputSection; + +// Class to describe an instance of a Thunk. +// A Thunk is a code-sequence inserted by the linker in between a caller and +// the callee. The relocation to the callee is redirected to the Thunk, which +// after executing transfers control to the callee. Typical uses of Thunks +// include transferring control from non-pi to pi and changing state on +// targets like ARM. +// +// Thunks can be created for DefinedRegular and Shared Symbols. The Thunk +// is stored in a field of the Symbol Destination. +// Thunks to be written to an InputSection are recorded by the InputSection. +template <class ELFT> class Thunk { + typedef typename ELFT::uint uintX_t; + +public: + Thunk(const SymbolBody &Destination, const InputSection<ELFT> &Owner); + virtual ~Thunk(); + + virtual uint32_t size() const { return 0; } + virtual void writeTo(uint8_t *Buf) const {} + uintX_t getVA() const; + +protected: + const SymbolBody &Destination; + const InputSection<ELFT> &Owner; + uint64_t Offset; +}; + +// For a Relocation to symbol S from InputSection Src, create a Thunk and +// update the fields of S and the InputSection that the Thunk body will be +// written to. At present there are implementations for ARM and Mips Thunks. +template <class ELFT> +void addThunk(uint32_t RelocType, SymbolBody &S, InputSection<ELFT> &Src); + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Writer.cpp b/contrib/llvm/tools/lld/ELF/Writer.cpp new file mode 100644 index 000000000000..154de8cf6d18 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Writer.cpp @@ -0,0 +1,1723 @@ +//===- Writer.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Writer.h" +#include "Config.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "OutputSections.h" +#include "Relocations.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include <climits> +#include <thread> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +namespace { +// The writer writes a SymbolTable result to a file. +template <class ELFT> class Writer { +public: + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Ehdr Elf_Ehdr; + typedef typename ELFT::Phdr Elf_Phdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::SymRange Elf_Sym_Range; + typedef typename ELFT::Rela Elf_Rela; + void run(); + +private: + void createSyntheticSections(); + void copyLocalSymbols(); + void addReservedSymbols(); + void addInputSec(InputSectionBase<ELFT> *S); + void createSections(); + void forEachRelSec(std::function<void(InputSectionBase<ELFT> &)> Fn); + void sortSections(); + void finalizeSections(); + void addPredefinedSections(); + + std::vector<PhdrEntry> createPhdrs(); + void removeEmptyPTLoad(); + void addPtArmExid(std::vector<PhdrEntry> &Phdrs); + void assignAddresses(); + void assignFileOffsets(); + void assignFileOffsetsBinary(); + void setPhdrs(); + void fixHeaders(); + void fixSectionAlignments(); + void fixAbsoluteSymbols(); + void openFile(); + void writeHeader(); + void writeSections(); + void writeSectionsBinary(); + void writeBuildId(); + + std::unique_ptr<FileOutputBuffer> Buffer; + + std::vector<OutputSectionBase *> OutputSections; + OutputSectionFactory<ELFT> Factory; + + void addRelIpltSymbols(); + void addStartEndSymbols(); + void addStartStopSymbols(OutputSectionBase *Sec); + uintX_t getEntryAddr(); + OutputSectionBase *findSection(StringRef Name); + + std::vector<PhdrEntry> Phdrs; + + uintX_t FileSize; + uintX_t SectionHeaderOff; + bool AllocateHeader = true; +}; +} // anonymous namespace + +StringRef elf::getOutputSectionName(StringRef Name) { + if (Config->Relocatable) + return Name; + + for (StringRef V : + {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.", + ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", + ".gcc_except_table.", ".tdata.", ".ARM.exidx."}) { + StringRef Prefix = V.drop_back(); + if (Name.startswith(V) || Name == Prefix) + return Prefix; + } + + // CommonSection is identified as "COMMON" in linker scripts. + // By default, it should go to .bss section. + if (Name == "COMMON") + return ".bss"; + + // ".zdebug_" is a prefix for ZLIB-compressed sections. + // Because we decompressed input sections, we want to remove 'z'. + if (Name.startswith(".zdebug_")) + return Saver.save(Twine(".") + Name.substr(2)); + return Name; +} + +template <class ELFT> void elf::reportDiscarded(InputSectionBase<ELFT> *IS) { + if (!Config->PrintGcSections) + return; + errs() << "removing unused section from '" << IS->Name << "' in file '" + << IS->getFile()->getName() << "'\n"; +} + +template <class ELFT> static bool needsInterpSection() { + return !Symtab<ELFT>::X->getSharedFiles().empty() && + !Config->DynamicLinker.empty() && + !Script<ELFT>::X->ignoreInterpSection(); +} + +template <class ELFT> void elf::writeResult() { Writer<ELFT>().run(); } + +template <class ELFT> void Writer<ELFT>::removeEmptyPTLoad() { + auto I = std::remove_if(Phdrs.begin(), Phdrs.end(), [&](const PhdrEntry &P) { + if (P.p_type != PT_LOAD) + return false; + if (!P.First) + return true; + uintX_t Size = P.Last->Addr + P.Last->Size - P.First->Addr; + return Size == 0; + }); + Phdrs.erase(I, Phdrs.end()); +} + +// The main function of the writer. +template <class ELFT> void Writer<ELFT>::run() { + // Create linker-synthesized sections such as .got or .plt. + // Such sections are of type input section. + createSyntheticSections(); + + // We need to create some reserved symbols such as _end. Create them. + if (!Config->Relocatable) + addReservedSymbols(); + + // Some architectures use small displacements for jump instructions. + // It is linker's responsibility to create thunks containing long + // jump instructions if jump targets are too far. Create thunks. + if (Target->NeedsThunks) + forEachRelSec(createThunks<ELFT>); + + // Create output sections. + Script<ELFT>::X->OutputSections = &OutputSections; + if (ScriptConfig->HasSections) { + // If linker script contains SECTIONS commands, let it create sections. + Script<ELFT>::X->processCommands(Factory); + + // Linker scripts may have left some input sections unassigned. + // Assign such sections using the default rule. + Script<ELFT>::X->addOrphanSections(Factory); + } else { + // If linker script does not contain SECTIONS commands, create + // output sections by default rules. We still need to give the + // linker script a chance to run, because it might contain + // non-SECTIONS commands such as ASSERT. + createSections(); + Script<ELFT>::X->processCommands(Factory); + } + + if (Config->Discard != DiscardPolicy::All) + copyLocalSymbols(); + + // Now that we have a complete set of output sections. This function + // completes section contents. For example, we need to add strings + // to the string table, and add entries to .got and .plt. + // finalizeSections does that. + finalizeSections(); + if (ErrorCount) + return; + + if (Config->Relocatable) { + assignFileOffsets(); + } else { + if (ScriptConfig->HasSections) { + Script<ELFT>::X->assignAddresses(Phdrs); + } else { + fixSectionAlignments(); + assignAddresses(); + } + + // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a + // 0 sized region. This has to be done late since only after assignAddresses + // we know the size of the sections. + removeEmptyPTLoad(); + + if (!Config->OFormatBinary) + assignFileOffsets(); + else + assignFileOffsetsBinary(); + + setPhdrs(); + fixAbsoluteSymbols(); + } + + // Write the result down to a file. + openFile(); + if (ErrorCount) + return; + if (!Config->OFormatBinary) { + writeHeader(); + writeSections(); + } else { + writeSectionsBinary(); + } + + // Backfill .note.gnu.build-id section content. This is done at last + // because the content is usually a hash value of the entire output file. + writeBuildId(); + if (ErrorCount) + return; + + if (auto EC = Buffer->commit()) + error(EC, "failed to write to the output file"); + + // Flush the output streams and exit immediately. A full shutdown + // is a good test that we are keeping track of all allocated memory, + // but actually freeing it is a waste of time in a regular linker run. + if (Config->ExitEarly) + exitLld(0); +} + +// Initialize Out<ELFT> members. +template <class ELFT> void Writer<ELFT>::createSyntheticSections() { + // Initialize all pointers with NULL. This is needed because + // you can call lld::elf::main more than once as a library. + memset(&Out<ELFT>::First, 0, sizeof(Out<ELFT>)); + + // Create singleton output sections. + Out<ELFT>::Bss = + make<OutputSection<ELFT>>(".bss", SHT_NOBITS, SHF_ALLOC | SHF_WRITE); + In<ELFT>::DynStrTab = make<StringTableSection<ELFT>>(".dynstr", true); + In<ELFT>::Dynamic = make<DynamicSection<ELFT>>(); + Out<ELFT>::EhFrame = make<EhOutputSection<ELFT>>(); + In<ELFT>::RelaDyn = make<RelocationSection<ELFT>>( + Config->Rela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc); + In<ELFT>::ShStrTab = make<StringTableSection<ELFT>>(".shstrtab", false); + + Out<ELFT>::ElfHeader = make<OutputSectionBase>("", 0, SHF_ALLOC); + Out<ELFT>::ElfHeader->Size = sizeof(Elf_Ehdr); + Out<ELFT>::ProgramHeaders = make<OutputSectionBase>("", 0, SHF_ALLOC); + Out<ELFT>::ProgramHeaders->updateAlignment(sizeof(uintX_t)); + + if (needsInterpSection<ELFT>()) { + In<ELFT>::Interp = createInterpSection<ELFT>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Interp); + } else { + In<ELFT>::Interp = nullptr; + } + + if (!Config->Relocatable) + Symtab<ELFT>::X->Sections.push_back(createCommentSection<ELFT>()); + + if (Config->Strip != StripPolicy::All) { + In<ELFT>::StrTab = make<StringTableSection<ELFT>>(".strtab", false); + In<ELFT>::SymTab = make<SymbolTableSection<ELFT>>(*In<ELFT>::StrTab); + } + + if (Config->BuildId != BuildIdKind::None) { + In<ELFT>::BuildId = make<BuildIdSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::BuildId); + } + + InputSection<ELFT> *Common = createCommonSection<ELFT>(); + if (!Common->Data.empty()) { + In<ELFT>::Common = Common; + Symtab<ELFT>::X->Sections.push_back(Common); + } + + // Add MIPS-specific sections. + bool HasDynSymTab = !Symtab<ELFT>::X->getSharedFiles().empty() || Config->Pic; + if (Config->EMachine == EM_MIPS) { + if (!Config->Shared && HasDynSymTab) { + In<ELFT>::MipsRldMap = make<MipsRldMapSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::MipsRldMap); + } + if (auto *Sec = MipsAbiFlagsSection<ELFT>::create()) + Symtab<ELFT>::X->Sections.push_back(Sec); + if (auto *Sec = MipsOptionsSection<ELFT>::create()) + Symtab<ELFT>::X->Sections.push_back(Sec); + if (auto *Sec = MipsReginfoSection<ELFT>::create()) + Symtab<ELFT>::X->Sections.push_back(Sec); + } + + if (HasDynSymTab) { + In<ELFT>::DynSymTab = make<SymbolTableSection<ELFT>>(*In<ELFT>::DynStrTab); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::DynSymTab); + + In<ELFT>::VerSym = make<VersionTableSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerSym); + + if (!Config->VersionDefinitions.empty()) { + In<ELFT>::VerDef = make<VersionDefinitionSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerDef); + } + + In<ELFT>::VerNeed = make<VersionNeedSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerNeed); + + if (Config->GnuHash) { + In<ELFT>::GnuHashTab = make<GnuHashTableSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GnuHashTab); + } + + if (Config->SysvHash) { + In<ELFT>::HashTab = make<HashTableSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::HashTab); + } + + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Dynamic); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::DynStrTab); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaDyn); + } + + // Add .got. MIPS' .got is so different from the other archs, + // it has its own class. + if (Config->EMachine == EM_MIPS) { + In<ELFT>::MipsGot = make<MipsGotSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::MipsGot); + } else { + In<ELFT>::Got = make<GotSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Got); + } + + In<ELFT>::GotPlt = make<GotPltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GotPlt); + In<ELFT>::IgotPlt = make<IgotPltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::IgotPlt); + + if (Config->GdbIndex) { + In<ELFT>::GdbIndex = make<GdbIndexSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GdbIndex); + } + + // We always need to add rel[a].plt to output if it has entries. + // Even for static linking it can contain R_[*]_IRELATIVE relocations. + In<ELFT>::RelaPlt = make<RelocationSection<ELFT>>( + Config->Rela ? ".rela.plt" : ".rel.plt", false /*Sort*/); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaPlt); + + // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure + // that the IRelative relocations are processed last by the dynamic loader + In<ELFT>::RelaIplt = make<RelocationSection<ELFT>>( + (Config->EMachine == EM_ARM) ? ".rel.dyn" : In<ELFT>::RelaPlt->Name, + false /*Sort*/); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaIplt); + + In<ELFT>::Plt = make<PltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Plt); + In<ELFT>::Iplt = make<IpltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Iplt); + + if (Config->EhFrameHdr) { + In<ELFT>::EhFrameHdr = make<EhFrameHeader<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::EhFrameHdr); + } +} + +template <class ELFT> +static bool shouldKeepInSymtab(InputSectionBase<ELFT> *Sec, StringRef SymName, + const SymbolBody &B) { + if (B.isFile()) + return false; + + // We keep sections in symtab for relocatable output. + if (B.isSection()) + return Config->Relocatable; + + // If sym references a section in a discarded group, don't keep it. + if (Sec == &InputSection<ELFT>::Discarded) + return false; + + if (Config->Discard == DiscardPolicy::None) + return true; + + // In ELF assembly .L symbols are normally discarded by the assembler. + // If the assembler fails to do so, the linker discards them if + // * --discard-locals is used. + // * The symbol is in a SHF_MERGE section, which is normally the reason for + // the assembler keeping the .L symbol. + if (!SymName.startswith(".L") && !SymName.empty()) + return true; + + if (Config->Discard == DiscardPolicy::Locals) + return false; + + return !Sec || !(Sec->Flags & SHF_MERGE); +} + +template <class ELFT> static bool includeInSymtab(const SymbolBody &B) { + if (!B.isLocal() && !B.symbol()->IsUsedInRegularObj) + return false; + + // If --retain-symbols-file is given, we'll keep only symbols listed in that + // file. + if (Config->Discard == DiscardPolicy::RetainFile && + !Config->RetainSymbolsFile.count(B.getName())) + return false; + + if (auto *D = dyn_cast<DefinedRegular<ELFT>>(&B)) { + // Always include absolute symbols. + if (!D->Section) + return true; + // Exclude symbols pointing to garbage-collected sections. + if (!D->Section->Live) + return false; + if (auto *S = dyn_cast<MergeInputSection<ELFT>>(D->Section)) + if (!S->getSectionPiece(D->Value)->Live) + return false; + } + return true; +} + +// Local symbols are not in the linker's symbol table. This function scans +// each object file's symbol table to copy local symbols to the output. +template <class ELFT> void Writer<ELFT>::copyLocalSymbols() { + if (!In<ELFT>::SymTab) + return; + for (elf::ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { + for (SymbolBody *B : F->getLocalSymbols()) { + if (!B->IsLocal) + fatal(toString(F) + + ": broken object: getLocalSymbols returns a non-local symbol"); + auto *DR = dyn_cast<DefinedRegular<ELFT>>(B); + + // No reason to keep local undefined symbol in symtab. + if (!DR) + continue; + if (!includeInSymtab<ELFT>(*B)) + continue; + + InputSectionBase<ELFT> *Sec = DR->Section; + if (!shouldKeepInSymtab<ELFT>(Sec, B->getName(), *B)) + continue; + ++In<ELFT>::SymTab->NumLocals; + if (Config->Relocatable) + B->DynsymIndex = In<ELFT>::SymTab->NumLocals; + F->KeptLocalSyms.push_back(std::make_pair( + DR, In<ELFT>::SymTab->StrTabSec.addString(B->getName()))); + } + } +} + +// PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections that +// we would like to make sure appear is a specific order to maximize their +// coverage by a single signed 16-bit offset from the TOC base pointer. +// Conversely, the special .tocbss section should be first among all SHT_NOBITS +// sections. This will put it next to the loaded special PPC64 sections (and, +// thus, within reach of the TOC base pointer). +static int getPPC64SectionRank(StringRef SectionName) { + return StringSwitch<int>(SectionName) + .Case(".tocbss", 0) + .Case(".branch_lt", 2) + .Case(".toc", 3) + .Case(".toc1", 4) + .Case(".opd", 5) + .Default(1); +} + +template <class ELFT> bool elf::isRelroSection(const OutputSectionBase *Sec) { + if (!Config->ZRelro) + return false; + uint64_t Flags = Sec->Flags; + if (!(Flags & SHF_ALLOC) || !(Flags & SHF_WRITE)) + return false; + if (Flags & SHF_TLS) + return true; + uint32_t Type = Sec->Type; + if (Type == SHT_INIT_ARRAY || Type == SHT_FINI_ARRAY || + Type == SHT_PREINIT_ARRAY) + return true; + if (Sec == In<ELFT>::GotPlt->OutSec) + return Config->ZNow; + if (Sec == In<ELFT>::Dynamic->OutSec) + return true; + if (In<ELFT>::Got && Sec == In<ELFT>::Got->OutSec) + return true; + if (In<ELFT>::MipsGot && Sec == In<ELFT>::MipsGot->OutSec) + return true; + StringRef S = Sec->getName(); + return S == ".data.rel.ro" || S == ".ctors" || S == ".dtors" || S == ".jcr" || + S == ".eh_frame" || S == ".openbsd.randomdata"; +} + +template <class ELFT> +static bool compareSectionsNonScript(const OutputSectionBase *A, + const OutputSectionBase *B) { + // Put .interp first because some loaders want to see that section + // on the first page of the executable file when loaded into memory. + bool AIsInterp = A->getName() == ".interp"; + bool BIsInterp = B->getName() == ".interp"; + if (AIsInterp != BIsInterp) + return AIsInterp; + + // Allocatable sections go first to reduce the total PT_LOAD size and + // so debug info doesn't change addresses in actual code. + bool AIsAlloc = A->Flags & SHF_ALLOC; + bool BIsAlloc = B->Flags & SHF_ALLOC; + if (AIsAlloc != BIsAlloc) + return AIsAlloc; + + // We don't have any special requirements for the relative order of two non + // allocatable sections. + if (!AIsAlloc) + return false; + + // We want to put section specified by -T option first, so we + // can start assigning VA starting from them later. + auto AAddrSetI = Config->SectionStartMap.find(A->getName()); + auto BAddrSetI = Config->SectionStartMap.find(B->getName()); + bool AHasAddrSet = AAddrSetI != Config->SectionStartMap.end(); + bool BHasAddrSet = BAddrSetI != Config->SectionStartMap.end(); + if (AHasAddrSet != BHasAddrSet) + return AHasAddrSet; + if (AHasAddrSet) + return AAddrSetI->second < BAddrSetI->second; + + // We want the read only sections first so that they go in the PT_LOAD + // covering the program headers at the start of the file. + bool AIsWritable = A->Flags & SHF_WRITE; + bool BIsWritable = B->Flags & SHF_WRITE; + if (AIsWritable != BIsWritable) + return BIsWritable; + + if (!Config->SingleRoRx) { + // For a corresponding reason, put non exec sections first (the program + // header PT_LOAD is not executable). + // We only do that if we are not using linker scripts, since with linker + // scripts ro and rx sections are in the same PT_LOAD, so their relative + // order is not important. The same applies for -no-rosegment. + bool AIsExec = A->Flags & SHF_EXECINSTR; + bool BIsExec = B->Flags & SHF_EXECINSTR; + if (AIsExec != BIsExec) + return BIsExec; + } + + // If we got here we know that both A and B are in the same PT_LOAD. + + // The TLS initialization block needs to be a single contiguous block in a R/W + // PT_LOAD, so stick TLS sections directly before R/W sections. The TLS NOBITS + // sections are placed here as they don't take up virtual address space in the + // PT_LOAD. + bool AIsTls = A->Flags & SHF_TLS; + bool BIsTls = B->Flags & SHF_TLS; + if (AIsTls != BIsTls) + return AIsTls; + + // The next requirement we have is to put nobits sections last. The + // reason is that the only thing the dynamic linker will see about + // them is a p_memsz that is larger than p_filesz. Seeing that it + // zeros the end of the PT_LOAD, so that has to correspond to the + // nobits sections. + bool AIsNoBits = A->Type == SHT_NOBITS; + bool BIsNoBits = B->Type == SHT_NOBITS; + if (AIsNoBits != BIsNoBits) + return BIsNoBits; + + // We place RelRo section before plain r/w ones. + bool AIsRelRo = isRelroSection<ELFT>(A); + bool BIsRelRo = isRelroSection<ELFT>(B); + if (AIsRelRo != BIsRelRo) + return AIsRelRo; + + // Some architectures have additional ordering restrictions for sections + // within the same PT_LOAD. + if (Config->EMachine == EM_PPC64) + return getPPC64SectionRank(A->getName()) < + getPPC64SectionRank(B->getName()); + + return false; +} + +// Output section ordering is determined by this function. +template <class ELFT> +static bool compareSections(const OutputSectionBase *A, + const OutputSectionBase *B) { + // For now, put sections mentioned in a linker script first. + int AIndex = Script<ELFT>::X->getSectionIndex(A->getName()); + int BIndex = Script<ELFT>::X->getSectionIndex(B->getName()); + bool AInScript = AIndex != INT_MAX; + bool BInScript = BIndex != INT_MAX; + if (AInScript != BInScript) + return AInScript; + // If both are in the script, use that order. + if (AInScript) + return AIndex < BIndex; + + return compareSectionsNonScript<ELFT>(A, B); +} + +// Program header entry +PhdrEntry::PhdrEntry(unsigned Type, unsigned Flags) { + p_type = Type; + p_flags = Flags; +} + +void PhdrEntry::add(OutputSectionBase *Sec) { + Last = Sec; + if (!First) + First = Sec; + p_align = std::max(p_align, Sec->Addralign); + if (p_type == PT_LOAD) + Sec->FirstInPtLoad = First; +} + +template <class ELFT> +static void addOptionalSynthetic(StringRef Name, OutputSectionBase *Sec, + typename ELFT::uint Val, + uint8_t StOther = STV_HIDDEN) { + if (SymbolBody *S = Symtab<ELFT>::X->find(Name)) + if (S->isUndefined() || S->isShared()) + Symtab<ELFT>::X->addSynthetic(Name, Sec, Val, StOther); +} + +template <class ELFT> +static Symbol *addRegular(StringRef Name, InputSectionBase<ELFT> *Sec, + typename ELFT::uint Value) { + // The linker generated symbols are added as STB_WEAK to allow user defined + // ones to override them. + return Symtab<ELFT>::X->addRegular(Name, STV_HIDDEN, STT_NOTYPE, Value, + /*Size=*/0, STB_WEAK, Sec, + /*File=*/nullptr); +} + +template <class ELFT> +static Symbol *addOptionalRegular(StringRef Name, InputSectionBase<ELFT> *IS, + typename ELFT::uint Value) { + SymbolBody *S = Symtab<ELFT>::X->find(Name); + if (!S) + return nullptr; + if (!S->isUndefined() && !S->isShared()) + return S->symbol(); + return addRegular(Name, IS, Value); +} + +// The beginning and the ending of .rel[a].plt section are marked +// with __rel[a]_iplt_{start,end} symbols if it is a statically linked +// executable. The runtime needs these symbols in order to resolve +// all IRELATIVE relocs on startup. For dynamic executables, we don't +// need these symbols, since IRELATIVE relocs are resolved through GOT +// and PLT. For details, see http://www.airs.com/blog/archives/403. +template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() { + if (In<ELFT>::DynSymTab) + return; + StringRef S = Config->Rela ? "__rela_iplt_start" : "__rel_iplt_start"; + addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, 0); + + S = Config->Rela ? "__rela_iplt_end" : "__rel_iplt_end"; + addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, -1); +} + +// The linker is expected to define some symbols depending on +// the linking result. This function defines such symbols. +template <class ELFT> void Writer<ELFT>::addReservedSymbols() { + if (Config->EMachine == EM_MIPS) { + // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer + // so that it points to an absolute address which by default is relative + // to GOT. Default offset is 0x7ff0. + // See "Global Data Symbols" in Chapter 6 in the following document: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + ElfSym<ELFT>::MipsGp = + Symtab<ELFT>::X->addAbsolute("_gp", STV_HIDDEN, STB_LOCAL); + + // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between + // start of function and 'gp' pointer into GOT. To simplify relocation + // calculation we assign _gp value to it and calculate corresponding + // relocations as relative to this value. + if (Symtab<ELFT>::X->find("_gp_disp")) + ElfSym<ELFT>::MipsGpDisp = + Symtab<ELFT>::X->addAbsolute("_gp_disp", STV_HIDDEN, STB_LOCAL); + + // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' + // pointer. This symbol is used in the code generated by .cpload pseudo-op + // in case of using -mno-shared option. + // https://sourceware.org/ml/binutils/2004-12/msg00094.html + if (Symtab<ELFT>::X->find("__gnu_local_gp")) + ElfSym<ELFT>::MipsLocalGp = + Symtab<ELFT>::X->addAbsolute("__gnu_local_gp", STV_HIDDEN, STB_LOCAL); + } + + // In the assembly for 32 bit x86 the _GLOBAL_OFFSET_TABLE_ symbol + // is magical and is used to produce a R_386_GOTPC relocation. + // The R_386_GOTPC relocation value doesn't actually depend on the + // symbol value, so it could use an index of STN_UNDEF which, according + // to the spec, means the symbol value is 0. + // Unfortunately both gas and MC keep the _GLOBAL_OFFSET_TABLE_ symbol in + // the object file. + // The situation is even stranger on x86_64 where the assembly doesn't + // need the magical symbol, but gas still puts _GLOBAL_OFFSET_TABLE_ as + // an undefined symbol in the .o files. + // Given that the symbol is effectively unused, we just create a dummy + // hidden one to avoid the undefined symbol error. + Symtab<ELFT>::X->addIgnored("_GLOBAL_OFFSET_TABLE_"); + + // __tls_get_addr is defined by the dynamic linker for dynamic ELFs. For + // static linking the linker is required to optimize away any references to + // __tls_get_addr, so it's not defined anywhere. Create a hidden definition + // to avoid the undefined symbol error. As usual special cases are ARM and + // MIPS - the libc for these targets defines __tls_get_addr itself because + // there are no TLS optimizations for these targets. + if (!In<ELFT>::DynSymTab && + (Config->EMachine != EM_MIPS && Config->EMachine != EM_ARM)) + Symtab<ELFT>::X->addIgnored("__tls_get_addr"); + + // If linker script do layout we do not need to create any standart symbols. + if (ScriptConfig->HasSections) + return; + + ElfSym<ELFT>::EhdrStart = Symtab<ELFT>::X->addIgnored("__ehdr_start"); + + auto Define = [this](StringRef S, DefinedRegular<ELFT> *&Sym1, + DefinedRegular<ELFT> *&Sym2) { + Sym1 = Symtab<ELFT>::X->addIgnored(S, STV_DEFAULT); + + // The name without the underscore is not a reserved name, + // so it is defined only when there is a reference against it. + assert(S.startswith("_")); + S = S.substr(1); + if (SymbolBody *B = Symtab<ELFT>::X->find(S)) + if (B->isUndefined()) + Sym2 = Symtab<ELFT>::X->addAbsolute(S, STV_DEFAULT); + }; + + Define("_end", ElfSym<ELFT>::End, ElfSym<ELFT>::End2); + Define("_etext", ElfSym<ELFT>::Etext, ElfSym<ELFT>::Etext2); + Define("_edata", ElfSym<ELFT>::Edata, ElfSym<ELFT>::Edata2); +} + +// Sort input sections by section name suffixes for +// __attribute__((init_priority(N))). +template <class ELFT> static void sortInitFini(OutputSectionBase *S) { + if (S) + reinterpret_cast<OutputSection<ELFT> *>(S)->sortInitFini(); +} + +// Sort input sections by the special rule for .ctors and .dtors. +template <class ELFT> static void sortCtorsDtors(OutputSectionBase *S) { + if (S) + reinterpret_cast<OutputSection<ELFT> *>(S)->sortCtorsDtors(); +} + +// Sort input sections using the list provided by --symbol-ordering-file. +template <class ELFT> +static void sortBySymbolsOrder(ArrayRef<OutputSectionBase *> OutputSections) { + if (Config->SymbolOrderingFile.empty()) + return; + + // Build a map from symbols to their priorities. Symbols that didn't + // appear in the symbol ordering file have the lowest priority 0. + // All explicitly mentioned symbols have negative (higher) priorities. + DenseMap<StringRef, int> SymbolOrder; + int Priority = -Config->SymbolOrderingFile.size(); + for (StringRef S : Config->SymbolOrderingFile) + SymbolOrder.insert({S, Priority++}); + + // Build a map from sections to their priorities. + DenseMap<InputSectionBase<ELFT> *, int> SectionOrder; + for (elf::ObjectFile<ELFT> *File : Symtab<ELFT>::X->getObjectFiles()) { + for (SymbolBody *Body : File->getSymbols()) { + auto *D = dyn_cast<DefinedRegular<ELFT>>(Body); + if (!D || !D->Section) + continue; + int &Priority = SectionOrder[D->Section]; + Priority = std::min(Priority, SymbolOrder.lookup(D->getName())); + } + } + + // Sort sections by priority. + for (OutputSectionBase *Base : OutputSections) + if (auto *Sec = dyn_cast<OutputSection<ELFT>>(Base)) + Sec->sort([&](InputSection<ELFT> *S) { return SectionOrder.lookup(S); }); +} + +template <class ELFT> +void Writer<ELFT>::forEachRelSec( + std::function<void(InputSectionBase<ELFT> &)> Fn) { + for (InputSectionBase<ELFT> *IS : Symtab<ELFT>::X->Sections) { + if (!IS->Live) + continue; + // Scan all relocations. Each relocation goes through a series + // of tests to determine if it needs special treatment, such as + // creating GOT, PLT, copy relocations, etc. + // Note that relocations for non-alloc sections are directly + // processed by InputSection::relocateNonAlloc. + if (!(IS->Flags & SHF_ALLOC)) + continue; + if (isa<InputSection<ELFT>>(IS) || isa<EhInputSection<ELFT>>(IS)) + Fn(*IS); + } +} + +template <class ELFT> +void Writer<ELFT>::addInputSec(InputSectionBase<ELFT> *IS) { + if (!IS) + return; + + if (!IS->Live) { + reportDiscarded(IS); + return; + } + OutputSectionBase *Sec; + bool IsNew; + StringRef OutsecName = getOutputSectionName(IS->Name); + std::tie(Sec, IsNew) = Factory.create(IS, OutsecName); + if (IsNew) + OutputSections.push_back(Sec); + Sec->addSection(IS); +} + +template <class ELFT> void Writer<ELFT>::createSections() { + for (InputSectionBase<ELFT> *IS : Symtab<ELFT>::X->Sections) + addInputSec(IS); + + sortBySymbolsOrder<ELFT>(OutputSections); + sortInitFini<ELFT>(findSection(".init_array")); + sortInitFini<ELFT>(findSection(".fini_array")); + sortCtorsDtors<ELFT>(findSection(".ctors")); + sortCtorsDtors<ELFT>(findSection(".dtors")); + + for (OutputSectionBase *Sec : OutputSections) + Sec->assignOffsets(); +} + +template <class ELFT> +static bool canSharePtLoad(const OutputSectionBase &S1, + const OutputSectionBase &S2) { + if (!(S1.Flags & SHF_ALLOC) || !(S2.Flags & SHF_ALLOC)) + return false; + + bool S1IsWrite = S1.Flags & SHF_WRITE; + bool S2IsWrite = S2.Flags & SHF_WRITE; + if (S1IsWrite != S2IsWrite) + return false; + + if (!S1IsWrite) + return true; // RO and RX share a PT_LOAD with linker scripts. + return (S1.Flags & SHF_EXECINSTR) == (S2.Flags & SHF_EXECINSTR); +} + +template <class ELFT> void Writer<ELFT>::sortSections() { + // Don't sort if using -r. It is not necessary and we want to preserve the + // relative order for SHF_LINK_ORDER sections. + if (Config->Relocatable) + return; + if (!ScriptConfig->HasSections) { + std::stable_sort(OutputSections.begin(), OutputSections.end(), + compareSectionsNonScript<ELFT>); + return; + } + Script<ELFT>::X->adjustSectionsBeforeSorting(); + + // The order of the sections in the script is arbitrary and may not agree with + // compareSectionsNonScript. This means that we cannot easily define a + // strict weak ordering. To see why, consider a comparison of a section in the + // script and one not in the script. We have a two simple options: + // * Make them equivalent (a is not less than b, and b is not less than a). + // The problem is then that equivalence has to be transitive and we can + // have sections a, b and c with only b in a script and a less than c + // which breaks this property. + // * Use compareSectionsNonScript. Given that the script order doesn't have + // to match, we can end up with sections a, b, c, d where b and c are in the + // script and c is compareSectionsNonScript less than b. In which case d + // can be equivalent to c, a to b and d < a. As a concrete example: + // .a (rx) # not in script + // .b (rx) # in script + // .c (ro) # in script + // .d (ro) # not in script + // + // The way we define an order then is: + // * First put script sections at the start and sort the script and + // non-script sections independently. + // * Move each non-script section to its preferred position. We try + // to put each section in the last position where it it can share + // a PT_LOAD. + + std::stable_sort(OutputSections.begin(), OutputSections.end(), + compareSections<ELFT>); + + auto I = OutputSections.begin(); + auto E = OutputSections.end(); + auto NonScriptI = + std::find_if(OutputSections.begin(), E, [](OutputSectionBase *S) { + return Script<ELFT>::X->getSectionIndex(S->getName()) == INT_MAX; + }); + while (NonScriptI != E) { + auto BestPos = std::max_element( + I, NonScriptI, [&](OutputSectionBase *&A, OutputSectionBase *&B) { + bool ACanSharePtLoad = canSharePtLoad<ELFT>(**NonScriptI, *A); + bool BCanSharePtLoad = canSharePtLoad<ELFT>(**NonScriptI, *B); + if (ACanSharePtLoad != BCanSharePtLoad) + return BCanSharePtLoad; + + bool ACmp = compareSectionsNonScript<ELFT>(*NonScriptI, A); + bool BCmp = compareSectionsNonScript<ELFT>(*NonScriptI, B); + if (ACmp != BCmp) + return BCmp; // FIXME: missing test + + size_t PosA = &A - &OutputSections[0]; + size_t PosB = &B - &OutputSections[0]; + return ACmp ? PosA > PosB : PosA < PosB; + }); + + // max_element only returns NonScriptI if the range is empty. If the range + // is not empty we should consider moving the the element forward one + // position. + if (BestPos != NonScriptI && + !compareSectionsNonScript<ELFT>(*NonScriptI, *BestPos)) + ++BestPos; + std::rotate(BestPos, NonScriptI, NonScriptI + 1); + ++NonScriptI; + } + + Script<ELFT>::X->adjustSectionsAfterSorting(); +} + +template <class ELFT> +static void +finalizeSynthetic(const std::vector<SyntheticSection<ELFT> *> &Sections) { + for (SyntheticSection<ELFT> *SS : Sections) + if (SS && SS->OutSec && !SS->empty()) { + SS->finalize(); + SS->OutSec->Size = 0; + SS->OutSec->assignOffsets(); + } +} + +// We need to add input synthetic sections early in createSyntheticSections() +// to make them visible from linkescript side. But not all sections are always +// required to be in output. For example we don't need dynamic section content +// sometimes. This function filters out such unused sections from output. +template <class ELFT> +static void removeUnusedSyntheticSections(std::vector<OutputSectionBase *> &V) { + // Input synthetic sections are placed after all regular ones. We iterate over + // them all and exit at first non-synthetic. + for (InputSectionBase<ELFT> *S : llvm::reverse(Symtab<ELFT>::X->Sections)) { + SyntheticSection<ELFT> *SS = dyn_cast<SyntheticSection<ELFT>>(S); + if (!SS) + return; + if (!SS->empty() || !SS->OutSec) + continue; + + OutputSection<ELFT> *OutSec = cast<OutputSection<ELFT>>(SS->OutSec); + OutSec->Sections.erase( + std::find(OutSec->Sections.begin(), OutSec->Sections.end(), SS)); + // If there is no other sections in output section, remove it from output. + if (OutSec->Sections.empty()) + V.erase(std::find(V.begin(), V.end(), OutSec)); + } +} + +// Create output section objects and add them to OutputSections. +template <class ELFT> void Writer<ELFT>::finalizeSections() { + Out<ELFT>::DebugInfo = findSection(".debug_info"); + Out<ELFT>::PreinitArray = findSection(".preinit_array"); + Out<ELFT>::InitArray = findSection(".init_array"); + Out<ELFT>::FiniArray = findSection(".fini_array"); + + // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop + // symbols for sections, so that the runtime can get the start and end + // addresses of each section by section name. Add such symbols. + if (!Config->Relocatable) { + addStartEndSymbols(); + for (OutputSectionBase *Sec : OutputSections) + addStartStopSymbols(Sec); + } + + // Add _DYNAMIC symbol. Unlike GNU gold, our _DYNAMIC symbol has no type. + // It should be okay as no one seems to care about the type. + // Even the author of gold doesn't remember why gold behaves that way. + // https://sourceware.org/ml/binutils/2002-03/msg00360.html + if (In<ELFT>::DynSymTab) + addRegular("_DYNAMIC", In<ELFT>::Dynamic, 0); + + // Define __rel[a]_iplt_{start,end} symbols if needed. + addRelIpltSymbols(); + + if (!Out<ELFT>::EhFrame->empty()) { + OutputSections.push_back(Out<ELFT>::EhFrame); + Out<ELFT>::EhFrame->finalize(); + } + + // Scan relocations. This must be done after every symbol is declared so that + // we can correctly decide if a dynamic relocation is needed. + forEachRelSec(scanRelocations<ELFT>); + + // Now that we have defined all possible symbols including linker- + // synthesized ones. Visit all symbols to give the finishing touches. + for (Symbol *S : Symtab<ELFT>::X->getSymbols()) { + SymbolBody *Body = S->body(); + + if (!includeInSymtab<ELFT>(*Body)) + continue; + if (In<ELFT>::SymTab) + In<ELFT>::SymTab->addSymbol(Body); + + if (In<ELFT>::DynSymTab && S->includeInDynsym()) { + In<ELFT>::DynSymTab->addSymbol(Body); + if (auto *SS = dyn_cast<SharedSymbol<ELFT>>(Body)) + if (SS->file()->isNeeded()) + In<ELFT>::VerNeed->addSymbol(SS); + } + } + + // Do not proceed if there was an undefined symbol. + if (ErrorCount) + return; + + // So far we have added sections from input object files. + // This function adds linker-created Out<ELFT>::* sections. + addPredefinedSections(); + removeUnusedSyntheticSections<ELFT>(OutputSections); + + sortSections(); + + unsigned I = 1; + for (OutputSectionBase *Sec : OutputSections) { + Sec->SectionIndex = I++; + Sec->ShName = In<ELFT>::ShStrTab->addString(Sec->getName()); + } + + // Binary and relocatable output does not have PHDRS. + // The headers have to be created before finalize as that can influence the + // image base and the dynamic section on mips includes the image base. + if (!Config->Relocatable && !Config->OFormatBinary) { + Phdrs = Script<ELFT>::X->hasPhdrsCommands() ? Script<ELFT>::X->createPhdrs() + : createPhdrs(); + addPtArmExid(Phdrs); + fixHeaders(); + } + + // Fill other section headers. The dynamic table is finalized + // at the end because some tags like RELSZ depend on result + // of finalizing other sections. + for (OutputSectionBase *Sec : OutputSections) + Sec->finalize(); + + // Dynamic section must be the last one in this list and dynamic + // symbol table section (DynSymTab) must be the first one. + finalizeSynthetic<ELFT>( + {In<ELFT>::DynSymTab, In<ELFT>::GnuHashTab, In<ELFT>::HashTab, + In<ELFT>::SymTab, In<ELFT>::ShStrTab, In<ELFT>::StrTab, + In<ELFT>::VerDef, In<ELFT>::DynStrTab, In<ELFT>::GdbIndex, + In<ELFT>::Got, In<ELFT>::MipsGot, In<ELFT>::IgotPlt, + In<ELFT>::GotPlt, In<ELFT>::RelaDyn, In<ELFT>::RelaIplt, + In<ELFT>::RelaPlt, In<ELFT>::Plt, In<ELFT>::Iplt, + In<ELFT>::Plt, In<ELFT>::EhFrameHdr, In<ELFT>::VerSym, + In<ELFT>::VerNeed, In<ELFT>::Dynamic}); +} + +template <class ELFT> void Writer<ELFT>::addPredefinedSections() { + if (Out<ELFT>::Bss->Size > 0) + OutputSections.push_back(Out<ELFT>::Bss); + + auto OS = dyn_cast_or_null<OutputSection<ELFT>>(findSection(".ARM.exidx")); + if (OS && !OS->Sections.empty() && !Config->Relocatable) + OS->addSection(make<ARMExidxSentinelSection<ELFT>>()); + + addInputSec(In<ELFT>::SymTab); + addInputSec(In<ELFT>::ShStrTab); + addInputSec(In<ELFT>::StrTab); +} + +// The linker is expected to define SECNAME_start and SECNAME_end +// symbols for a few sections. This function defines them. +template <class ELFT> void Writer<ELFT>::addStartEndSymbols() { + auto Define = [&](StringRef Start, StringRef End, OutputSectionBase *OS) { + // These symbols resolve to the image base if the section does not exist. + // A special value -1 indicates end of the section. + addOptionalSynthetic<ELFT>(Start, OS, 0); + addOptionalSynthetic<ELFT>(End, OS, OS ? -1 : 0); + }; + + Define("__preinit_array_start", "__preinit_array_end", + Out<ELFT>::PreinitArray); + Define("__init_array_start", "__init_array_end", Out<ELFT>::InitArray); + Define("__fini_array_start", "__fini_array_end", Out<ELFT>::FiniArray); + + if (OutputSectionBase *Sec = findSection(".ARM.exidx")) + Define("__exidx_start", "__exidx_end", Sec); +} + +// If a section name is valid as a C identifier (which is rare because of +// the leading '.'), linkers are expected to define __start_<secname> and +// __stop_<secname> symbols. They are at beginning and end of the section, +// respectively. This is not requested by the ELF standard, but GNU ld and +// gold provide the feature, and used by many programs. +template <class ELFT> +void Writer<ELFT>::addStartStopSymbols(OutputSectionBase *Sec) { + StringRef S = Sec->getName(); + if (!isValidCIdentifier(S)) + return; + addOptionalSynthetic<ELFT>(Saver.save("__start_" + S), Sec, 0, STV_DEFAULT); + addOptionalSynthetic<ELFT>(Saver.save("__stop_" + S), Sec, -1, STV_DEFAULT); +} + +template <class ELFT> +OutputSectionBase *Writer<ELFT>::findSection(StringRef Name) { + for (OutputSectionBase *Sec : OutputSections) + if (Sec->getName() == Name) + return Sec; + return nullptr; +} + +template <class ELFT> static bool needsPtLoad(OutputSectionBase *Sec) { + if (!(Sec->Flags & SHF_ALLOC)) + return false; + + // Don't allocate VA space for TLS NOBITS sections. The PT_TLS PHDR is + // responsible for allocating space for them, not the PT_LOAD that + // contains the TLS initialization image. + if (Sec->Flags & SHF_TLS && Sec->Type == SHT_NOBITS) + return false; + return true; +} + +// Linker scripts are responsible for aligning addresses. Unfortunately, most +// linker scripts are designed for creating two PT_LOADs only, one RX and one +// RW. This means that there is no alignment in the RO to RX transition and we +// cannot create a PT_LOAD there. +template <class ELFT> +static typename ELFT::uint computeFlags(typename ELFT::uint F) { + if (Config->OMagic) + return PF_R | PF_W | PF_X; + if (Config->SingleRoRx && !(F & PF_W)) + return F | PF_X; + return F; +} + +// Decide which program headers to create and which sections to include in each +// one. +template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { + std::vector<PhdrEntry> Ret; + auto AddHdr = [&](unsigned Type, unsigned Flags) -> PhdrEntry * { + Ret.emplace_back(Type, Flags); + return &Ret.back(); + }; + + // The first phdr entry is PT_PHDR which describes the program header itself. + PhdrEntry &Hdr = *AddHdr(PT_PHDR, PF_R); + Hdr.add(Out<ELFT>::ProgramHeaders); + + // PT_INTERP must be the second entry if exists. + if (OutputSectionBase *Sec = findSection(".interp")) { + PhdrEntry &Hdr = *AddHdr(PT_INTERP, Sec->getPhdrFlags()); + Hdr.add(Sec); + } + + // Add the first PT_LOAD segment for regular output sections. + uintX_t Flags = computeFlags<ELFT>(PF_R); + PhdrEntry *Load = AddHdr(PT_LOAD, Flags); + + PhdrEntry TlsHdr(PT_TLS, PF_R); + PhdrEntry RelRo(PT_GNU_RELRO, PF_R); + PhdrEntry Note(PT_NOTE, PF_R); + for (OutputSectionBase *Sec : OutputSections) { + if (!(Sec->Flags & SHF_ALLOC)) + break; + + // If we meet TLS section then we create TLS header + // and put all TLS sections inside for further use when + // assign addresses. + if (Sec->Flags & SHF_TLS) + TlsHdr.add(Sec); + + if (!needsPtLoad<ELFT>(Sec)) + continue; + + // Segments are contiguous memory regions that has the same attributes + // (e.g. executable or writable). There is one phdr for each segment. + // Therefore, we need to create a new phdr when the next section has + // different flags or is loaded at a discontiguous address using AT linker + // script command. + uintX_t NewFlags = computeFlags<ELFT>(Sec->getPhdrFlags()); + if (Script<ELFT>::X->hasLMA(Sec->getName()) || Flags != NewFlags) { + Load = AddHdr(PT_LOAD, NewFlags); + Flags = NewFlags; + } + + Load->add(Sec); + + if (isRelroSection<ELFT>(Sec)) + RelRo.add(Sec); + if (Sec->Type == SHT_NOTE) + Note.add(Sec); + } + + // Add the TLS segment unless it's empty. + if (TlsHdr.First) + Ret.push_back(std::move(TlsHdr)); + + // Add an entry for .dynamic. + if (In<ELFT>::DynSymTab) { + PhdrEntry &H = + *AddHdr(PT_DYNAMIC, In<ELFT>::Dynamic->OutSec->getPhdrFlags()); + H.add(In<ELFT>::Dynamic->OutSec); + } + + // PT_GNU_RELRO includes all sections that should be marked as + // read-only by dynamic linker after proccessing relocations. + if (RelRo.First) + Ret.push_back(std::move(RelRo)); + + // PT_GNU_EH_FRAME is a special section pointing on .eh_frame_hdr. + if (!Out<ELFT>::EhFrame->empty() && In<ELFT>::EhFrameHdr) { + PhdrEntry &Hdr = + *AddHdr(PT_GNU_EH_FRAME, In<ELFT>::EhFrameHdr->OutSec->getPhdrFlags()); + Hdr.add(In<ELFT>::EhFrameHdr->OutSec); + } + + // PT_OPENBSD_RANDOMIZE specifies the location and size of a part of the + // memory image of the program that must be filled with random data before any + // code in the object is executed. + if (OutputSectionBase *Sec = findSection(".openbsd.randomdata")) { + PhdrEntry &Hdr = *AddHdr(PT_OPENBSD_RANDOMIZE, Sec->getPhdrFlags()); + Hdr.add(Sec); + } + + // PT_GNU_STACK is a special section to tell the loader to make the + // pages for the stack non-executable. + if (!Config->ZExecstack) { + PhdrEntry &Hdr = *AddHdr(PT_GNU_STACK, PF_R | PF_W); + if (Config->ZStackSize != uint64_t(-1)) + Hdr.p_memsz = Config->ZStackSize; + } + + // PT_OPENBSD_WXNEEDED is a OpenBSD-specific header to mark the executable + // is expected to perform W^X violations, such as calling mprotect(2) or + // mmap(2) with PROT_WRITE | PROT_EXEC, which is prohibited by default on + // OpenBSD. + if (Config->ZWxneeded) + AddHdr(PT_OPENBSD_WXNEEDED, PF_X); + + if (Note.First) + Ret.push_back(std::move(Note)); + return Ret; +} + +template <class ELFT> +void Writer<ELFT>::addPtArmExid(std::vector<PhdrEntry> &Phdrs) { + if (Config->EMachine != EM_ARM) + return; + auto I = std::find_if( + OutputSections.begin(), OutputSections.end(), + [](OutputSectionBase *Sec) { return Sec->Type == SHT_ARM_EXIDX; }); + if (I == OutputSections.end()) + return; + + // PT_ARM_EXIDX is the ARM EHABI equivalent of PT_GNU_EH_FRAME + PhdrEntry ARMExidx(PT_ARM_EXIDX, PF_R); + ARMExidx.add(*I); + Phdrs.push_back(ARMExidx); +} + +// The first section of each PT_LOAD and the first section after PT_GNU_RELRO +// have to be page aligned so that the dynamic linker can set the permissions. +template <class ELFT> void Writer<ELFT>::fixSectionAlignments() { + for (const PhdrEntry &P : Phdrs) + if (P.p_type == PT_LOAD && P.First) + P.First->PageAlign = true; + + for (const PhdrEntry &P : Phdrs) { + if (P.p_type != PT_GNU_RELRO) + continue; + // Find the first section after PT_GNU_RELRO. If it is in a PT_LOAD we + // have to align it to a page. + auto End = OutputSections.end(); + auto I = std::find(OutputSections.begin(), End, P.Last); + if (I == End || (I + 1) == End) + continue; + OutputSectionBase *Sec = *(I + 1); + if (needsPtLoad<ELFT>(Sec)) + Sec->PageAlign = true; + } +} + +template <class ELFT> +void elf::allocateHeaders(MutableArrayRef<PhdrEntry> Phdrs, + ArrayRef<OutputSectionBase *> OutputSections) { + auto FirstPTLoad = + std::find_if(Phdrs.begin(), Phdrs.end(), + [](const PhdrEntry &E) { return E.p_type == PT_LOAD; }); + if (FirstPTLoad == Phdrs.end()) + return; + if (FirstPTLoad->First) + for (OutputSectionBase *Sec : OutputSections) + if (Sec->FirstInPtLoad == FirstPTLoad->First) + Sec->FirstInPtLoad = Out<ELFT>::ElfHeader; + FirstPTLoad->First = Out<ELFT>::ElfHeader; + if (!FirstPTLoad->Last) + FirstPTLoad->Last = Out<ELFT>::ProgramHeaders; +} + +// We should set file offsets and VAs for elf header and program headers +// sections. These are special, we do not include them into output sections +// list, but have them to simplify the code. +template <class ELFT> void Writer<ELFT>::fixHeaders() { + Out<ELFT>::ProgramHeaders->Size = sizeof(Elf_Phdr) * Phdrs.size(); + // If the script has SECTIONS, assignAddresses will compute the values. + if (ScriptConfig->HasSections) + return; + + uintX_t HeaderSize = getHeaderSize<ELFT>(); + // When -T<section> option is specified, lower the base to make room for those + // sections. + if (!Config->SectionStartMap.empty()) { + uint64_t Min = -1; + for (const auto &P : Config->SectionStartMap) + Min = std::min(Min, P.second); + if (HeaderSize < Min) + Min -= HeaderSize; + else + AllocateHeader = false; + if (Min < Config->ImageBase) + Config->ImageBase = alignDown(Min, Config->MaxPageSize); + } + + if (AllocateHeader) + allocateHeaders<ELFT>(Phdrs, OutputSections); + + uintX_t BaseVA = Config->ImageBase; + Out<ELFT>::ElfHeader->Addr = BaseVA; + Out<ELFT>::ProgramHeaders->Addr = BaseVA + Out<ELFT>::ElfHeader->Size; +} + +// Assign VAs (addresses at run-time) to output sections. +template <class ELFT> void Writer<ELFT>::assignAddresses() { + uintX_t VA = Config->ImageBase; + if (AllocateHeader) + VA += getHeaderSize<ELFT>(); + uintX_t ThreadBssOffset = 0; + for (OutputSectionBase *Sec : OutputSections) { + uintX_t Alignment = Sec->Addralign; + if (Sec->PageAlign) + Alignment = std::max<uintX_t>(Alignment, Config->MaxPageSize); + + auto I = Config->SectionStartMap.find(Sec->getName()); + if (I != Config->SectionStartMap.end()) + VA = I->second; + + // We only assign VAs to allocated sections. + if (needsPtLoad<ELFT>(Sec)) { + VA = alignTo(VA, Alignment); + Sec->Addr = VA; + VA += Sec->Size; + } else if (Sec->Flags & SHF_TLS && Sec->Type == SHT_NOBITS) { + uintX_t TVA = VA + ThreadBssOffset; + TVA = alignTo(TVA, Alignment); + Sec->Addr = TVA; + ThreadBssOffset = TVA - VA + Sec->Size; + } + } +} + +// Adjusts the file alignment for a given output section and returns +// its new file offset. The file offset must be the same with its +// virtual address (modulo the page size) so that the loader can load +// executables without any address adjustment. +template <class ELFT, class uintX_t> +static uintX_t getFileAlignment(uintX_t Off, OutputSectionBase *Sec) { + OutputSectionBase *First = Sec->FirstInPtLoad; + // If the section is not in a PT_LOAD, we just have to align it. + if (!First) + return alignTo(Off, Sec->Addralign); + + // The first section in a PT_LOAD has to have congruent offset and address + // module the page size. + if (Sec == First) + return alignTo(Off, Config->MaxPageSize, Sec->Addr); + + // If two sections share the same PT_LOAD the file offset is calculated + // using this formula: Off2 = Off1 + (VA2 - VA1). + return First->Offset + Sec->Addr - First->Addr; +} + +template <class ELFT, class uintX_t> +void setOffset(OutputSectionBase *Sec, uintX_t &Off) { + if (Sec->Type == SHT_NOBITS) { + Sec->Offset = Off; + return; + } + + Off = getFileAlignment<ELFT>(Off, Sec); + Sec->Offset = Off; + Off += Sec->Size; +} + +template <class ELFT> void Writer<ELFT>::assignFileOffsetsBinary() { + uintX_t Off = 0; + for (OutputSectionBase *Sec : OutputSections) + if (Sec->Flags & SHF_ALLOC) + setOffset<ELFT>(Sec, Off); + FileSize = alignTo(Off, sizeof(uintX_t)); +} + +// Assign file offsets to output sections. +template <class ELFT> void Writer<ELFT>::assignFileOffsets() { + uintX_t Off = 0; + setOffset<ELFT>(Out<ELFT>::ElfHeader, Off); + setOffset<ELFT>(Out<ELFT>::ProgramHeaders, Off); + + for (OutputSectionBase *Sec : OutputSections) + setOffset<ELFT>(Sec, Off); + + SectionHeaderOff = alignTo(Off, sizeof(uintX_t)); + FileSize = SectionHeaderOff + (OutputSections.size() + 1) * sizeof(Elf_Shdr); +} + +// Finalize the program headers. We call this function after we assign +// file offsets and VAs to all sections. +template <class ELFT> void Writer<ELFT>::setPhdrs() { + for (PhdrEntry &P : Phdrs) { + OutputSectionBase *First = P.First; + OutputSectionBase *Last = P.Last; + if (First) { + P.p_filesz = Last->Offset - First->Offset; + if (Last->Type != SHT_NOBITS) + P.p_filesz += Last->Size; + P.p_memsz = Last->Addr + Last->Size - First->Addr; + P.p_offset = First->Offset; + P.p_vaddr = First->Addr; + if (!P.HasLMA) + P.p_paddr = First->getLMA(); + } + if (P.p_type == PT_LOAD) + P.p_align = Config->MaxPageSize; + else if (P.p_type == PT_GNU_RELRO) { + P.p_align = 1; + // The glibc dynamic loader rounds the size down, so we need to round up + // to protect the last page. This is a no-op on FreeBSD which always + // rounds up. + P.p_memsz = alignTo(P.p_memsz, Config->MaxPageSize); + } + + // The TLS pointer goes after PT_TLS. At least glibc will align it, + // so round up the size to make sure the offsets are correct. + if (P.p_type == PT_TLS) { + Out<ELFT>::TlsPhdr = &P; + if (P.p_memsz) + P.p_memsz = alignTo(P.p_memsz, P.p_align); + } + } +} + +// The entry point address is chosen in the following ways. +// +// 1. the '-e' entry command-line option; +// 2. the ENTRY(symbol) command in a linker control script; +// 3. the value of the symbol start, if present; +// 4. the address of the first byte of the .text section, if present; +// 5. the address 0. +template <class ELFT> typename ELFT::uint Writer<ELFT>::getEntryAddr() { + // Case 1, 2 or 3. As a special case, if the symbol is actually + // a number, we'll use that number as an address. + if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Entry)) + return B->getVA<ELFT>(); + uint64_t Addr; + if (!Config->Entry.getAsInteger(0, Addr)) + return Addr; + + // Case 4 + if (OutputSectionBase *Sec = findSection(".text")) { + if (Config->WarnMissingEntry) + warn("cannot find entry symbol " + Config->Entry + "; defaulting to 0x" + + utohexstr(Sec->Addr)); + return Sec->Addr; + } + + // Case 5 + if (Config->WarnMissingEntry) + warn("cannot find entry symbol " + Config->Entry + + "; not setting start address"); + return 0; +} + +template <class ELFT> static uint8_t getELFEncoding() { + if (ELFT::TargetEndianness == llvm::support::little) + return ELFDATA2LSB; + return ELFDATA2MSB; +} + +static uint16_t getELFType() { + if (Config->Pic) + return ET_DYN; + if (Config->Relocatable) + return ET_REL; + return ET_EXEC; +} + +// This function is called after we have assigned address and size +// to each section. This function fixes some predefined absolute +// symbol values that depend on section address and size. +template <class ELFT> void Writer<ELFT>::fixAbsoluteSymbols() { + // __ehdr_start is the location of program headers. + if (ElfSym<ELFT>::EhdrStart) + ElfSym<ELFT>::EhdrStart->Value = Out<ELFT>::ProgramHeaders->Addr; + + auto Set = [](DefinedRegular<ELFT> *S1, DefinedRegular<ELFT> *S2, uintX_t V) { + if (S1) + S1->Value = V; + if (S2) + S2->Value = V; + }; + + // _etext is the first location after the last read-only loadable segment. + // _edata is the first location after the last read-write loadable segment. + // _end is the first location after the uninitialized data region. + for (PhdrEntry &P : Phdrs) { + if (P.p_type != PT_LOAD) + continue; + Set(ElfSym<ELFT>::End, ElfSym<ELFT>::End2, P.p_vaddr + P.p_memsz); + + uintX_t Val = P.p_vaddr + P.p_filesz; + if (P.p_flags & PF_W) + Set(ElfSym<ELFT>::Edata, ElfSym<ELFT>::Edata2, Val); + else + Set(ElfSym<ELFT>::Etext, ElfSym<ELFT>::Etext2, Val); + } + + // Setup MIPS _gp_disp/__gnu_local_gp symbols which should + // be equal to the _gp symbol's value. + if (Config->EMachine == EM_MIPS) { + if (!ElfSym<ELFT>::MipsGp->Value) { + // Find GP-relative section with the lowest address + // and use this address to calculate default _gp value. + uintX_t Gp = -1; + for (const OutputSectionBase * OS : OutputSections) + if ((OS->Flags & SHF_MIPS_GPREL) && OS->Addr < Gp) + Gp = OS->Addr; + if (Gp != (uintX_t)-1) + ElfSym<ELFT>::MipsGp->Value = Gp + 0x7ff0; + } + if (ElfSym<ELFT>::MipsGpDisp) + ElfSym<ELFT>::MipsGpDisp->Value = ElfSym<ELFT>::MipsGp->Value; + if (ElfSym<ELFT>::MipsLocalGp) + ElfSym<ELFT>::MipsLocalGp->Value = ElfSym<ELFT>::MipsGp->Value; + } +} + +template <class ELFT> void Writer<ELFT>::writeHeader() { + uint8_t *Buf = Buffer->getBufferStart(); + memcpy(Buf, "\177ELF", 4); + + // Write the ELF header. + auto *EHdr = reinterpret_cast<Elf_Ehdr *>(Buf); + EHdr->e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; + EHdr->e_ident[EI_DATA] = getELFEncoding<ELFT>(); + EHdr->e_ident[EI_VERSION] = EV_CURRENT; + EHdr->e_ident[EI_OSABI] = Config->OSABI; + EHdr->e_type = getELFType(); + EHdr->e_machine = Config->EMachine; + EHdr->e_version = EV_CURRENT; + EHdr->e_entry = getEntryAddr(); + EHdr->e_shoff = SectionHeaderOff; + EHdr->e_ehsize = sizeof(Elf_Ehdr); + EHdr->e_phnum = Phdrs.size(); + EHdr->e_shentsize = sizeof(Elf_Shdr); + EHdr->e_shnum = OutputSections.size() + 1; + EHdr->e_shstrndx = In<ELFT>::ShStrTab->OutSec->SectionIndex; + + if (Config->EMachine == EM_ARM) + // We don't currently use any features incompatible with EF_ARM_EABI_VER5, + // but we don't have any firm guarantees of conformance. Linux AArch64 + // kernels (as of 2016) require an EABI version to be set. + EHdr->e_flags = EF_ARM_EABI_VER5; + else if (Config->EMachine == EM_MIPS) + EHdr->e_flags = getMipsEFlags<ELFT>(); + + if (!Config->Relocatable) { + EHdr->e_phoff = sizeof(Elf_Ehdr); + EHdr->e_phentsize = sizeof(Elf_Phdr); + } + + // Write the program header table. + auto *HBuf = reinterpret_cast<Elf_Phdr *>(Buf + EHdr->e_phoff); + for (PhdrEntry &P : Phdrs) { + HBuf->p_type = P.p_type; + HBuf->p_flags = P.p_flags; + HBuf->p_offset = P.p_offset; + HBuf->p_vaddr = P.p_vaddr; + HBuf->p_paddr = P.p_paddr; + HBuf->p_filesz = P.p_filesz; + HBuf->p_memsz = P.p_memsz; + HBuf->p_align = P.p_align; + ++HBuf; + } + + // Write the section header table. Note that the first table entry is null. + auto *SHdrs = reinterpret_cast<Elf_Shdr *>(Buf + EHdr->e_shoff); + for (OutputSectionBase *Sec : OutputSections) + Sec->writeHeaderTo<ELFT>(++SHdrs); +} + +// Removes a given file asynchronously. This is a performance hack, +// so remove this when operating systems are improved. +// +// On Linux (and probably on other Unix-like systems), unlink(2) is a +// noticeably slow system call. As of 2016, unlink takes 250 +// milliseconds to remove a 1 GB file on ext4 filesystem on my machine. +// +// To create a new result file, we first remove existing file. So, if +// you repeatedly link a 1 GB program in a regular compile-link-debug +// cycle, every cycle wastes 250 milliseconds only to remove a file. +// Since LLD can link a 1 GB binary in about 5 seconds, that waste +// actually counts. +// +// This function spawns a background thread to call unlink. +// The calling thread returns almost immediately. +static void unlinkAsync(StringRef Path) { + if (!Config->Threads || !sys::fs::exists(Config->OutputFile)) + return; + + // First, rename Path to avoid race condition. We cannot remove + // Path from a different thread because we are now going to create + // Path as a new file. If we do that in a different thread, the new + // thread can remove the new file. + SmallString<128> TempPath; + if (auto EC = sys::fs::createUniqueFile(Path + "tmp%%%%%%%%", TempPath)) + fatal(EC, "createUniqueFile failed"); + if (auto EC = sys::fs::rename(Path, TempPath)) + fatal(EC, "rename failed"); + + // Remove TempPath in background. + std::thread([=] { ::remove(TempPath.str().str().c_str()); }).detach(); +} + +// Open a result file. +template <class ELFT> void Writer<ELFT>::openFile() { + unlinkAsync(Config->OutputFile); + ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Config->OutputFile, FileSize, + FileOutputBuffer::F_executable); + + if (auto EC = BufferOrErr.getError()) + error(EC, "failed to open " + Config->OutputFile); + else + Buffer = std::move(*BufferOrErr); +} + +template <class ELFT> void Writer<ELFT>::writeSectionsBinary() { + uint8_t *Buf = Buffer->getBufferStart(); + for (OutputSectionBase *Sec : OutputSections) + if (Sec->Flags & SHF_ALLOC) + Sec->writeTo(Buf + Sec->Offset); +} + +// Write section contents to a mmap'ed file. +template <class ELFT> void Writer<ELFT>::writeSections() { + uint8_t *Buf = Buffer->getBufferStart(); + + // PPC64 needs to process relocations in the .opd section + // before processing relocations in code-containing sections. + Out<ELFT>::Opd = findSection(".opd"); + if (Out<ELFT>::Opd) { + Out<ELFT>::OpdBuf = Buf + Out<ELFT>::Opd->Offset; + Out<ELFT>::Opd->writeTo(Buf + Out<ELFT>::Opd->Offset); + } + + OutputSectionBase *EhFrameHdr = + In<ELFT>::EhFrameHdr ? In<ELFT>::EhFrameHdr->OutSec : nullptr; + for (OutputSectionBase *Sec : OutputSections) + if (Sec != Out<ELFT>::Opd && Sec != EhFrameHdr) + Sec->writeTo(Buf + Sec->Offset); + + // The .eh_frame_hdr depends on .eh_frame section contents, therefore + // it should be written after .eh_frame is written. + if (!Out<ELFT>::EhFrame->empty() && EhFrameHdr) + EhFrameHdr->writeTo(Buf + EhFrameHdr->Offset); +} + +template <class ELFT> void Writer<ELFT>::writeBuildId() { + if (!In<ELFT>::BuildId || !In<ELFT>::BuildId->OutSec) + return; + + // Compute a hash of all sections of the output file. + uint8_t *Start = Buffer->getBufferStart(); + uint8_t *End = Start + FileSize; + In<ELFT>::BuildId->writeBuildId({Start, End}); +} + +template void elf::writeResult<ELF32LE>(); +template void elf::writeResult<ELF32BE>(); +template void elf::writeResult<ELF64LE>(); +template void elf::writeResult<ELF64BE>(); + +template void elf::allocateHeaders<ELF32LE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); +template void elf::allocateHeaders<ELF32BE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); +template void elf::allocateHeaders<ELF64LE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); +template void elf::allocateHeaders<ELF64BE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); + +template bool elf::isRelroSection<ELF32LE>(const OutputSectionBase *); +template bool elf::isRelroSection<ELF32BE>(const OutputSectionBase *); +template bool elf::isRelroSection<ELF64LE>(const OutputSectionBase *); +template bool elf::isRelroSection<ELF64BE>(const OutputSectionBase *); + +template void elf::reportDiscarded<ELF32LE>(InputSectionBase<ELF32LE> *); +template void elf::reportDiscarded<ELF32BE>(InputSectionBase<ELF32BE> *); +template void elf::reportDiscarded<ELF64LE>(InputSectionBase<ELF64LE> *); +template void elf::reportDiscarded<ELF64BE>(InputSectionBase<ELF64BE> *); diff --git a/contrib/llvm/tools/lld/ELF/Writer.h b/contrib/llvm/tools/lld/ELF/Writer.h new file mode 100644 index 000000000000..718e3139a809 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Writer.h @@ -0,0 +1,66 @@ +//===- Writer.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_WRITER_H +#define LLD_ELF_WRITER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include <cstdint> +#include <memory> + +namespace lld { +namespace elf { +class InputFile; +class OutputSectionBase; +template <class ELFT> class InputSectionBase; +template <class ELFT> class ObjectFile; +template <class ELFT> class SymbolTable; +template <class ELFT> void writeResult(); +template <class ELFT> void markLive(); +template <class ELFT> bool isRelroSection(const OutputSectionBase *Sec); + +// This describes a program header entry. +// Each contains type, access flags and range of output sections that will be +// placed in it. +struct PhdrEntry { + PhdrEntry(unsigned Type, unsigned Flags); + void add(OutputSectionBase *Sec); + + uint64_t p_paddr = 0; + uint64_t p_vaddr = 0; + uint64_t p_align = 0; + uint64_t p_memsz = 0; + uint64_t p_filesz = 0; + uint64_t p_offset = 0; + uint32_t p_type = 0; + uint32_t p_flags = 0; + + OutputSectionBase *First = nullptr; + OutputSectionBase *Last = nullptr; + bool HasLMA = false; +}; + +llvm::StringRef getOutputSectionName(llvm::StringRef Name); + +template <class ELFT> +void allocateHeaders(llvm::MutableArrayRef<PhdrEntry>, + llvm::ArrayRef<OutputSectionBase *>); +template <class ELFT> void reportDiscarded(InputSectionBase<ELFT> *IS); + +template <class ELFT> uint32_t getMipsEFlags(); + +uint8_t getMipsFpAbiFlag(uint8_t OldFlag, uint8_t NewFlag, + llvm::StringRef FileName); + +bool isMipsN32Abi(const InputFile *F); +} +} + +#endif |
