diff options
Diffstat (limited to 'contrib/llvm-project/lld/MachO')
32 files changed, 3844 insertions, 692 deletions
diff --git a/contrib/llvm-project/lld/MachO/Arch/X86_64.cpp b/contrib/llvm-project/lld/MachO/Arch/X86_64.cpp index 36f686ca2f1d..729ef603adb7 100644 --- a/contrib/llvm-project/lld/MachO/Arch/X86_64.cpp +++ b/contrib/llvm-project/lld/MachO/Arch/X86_64.cpp @@ -25,24 +25,26 @@ namespace { struct X86_64 : TargetInfo { X86_64(); - uint64_t getImplicitAddend(MemoryBufferRef, const section_64 &, - const relocation_info &) const override; + bool isPairedReloc(relocation_info) const override; + uint64_t getAddend(MemoryBufferRef, const section_64 &, relocation_info, + relocation_info) const override; void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const override; - void writeStub(uint8_t *buf, const DylibSymbol &) const override; + void writeStub(uint8_t *buf, const macho::Symbol &) const override; void writeStubHelperHeader(uint8_t *buf) const override; void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, uint64_t entryAddr) const override; - void prepareSymbolRelocation(lld::macho::Symbol &, const InputSection *, + void prepareSymbolRelocation(lld::macho::Symbol *, const InputSection *, const Reloc &) override; - uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override; + uint64_t resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &, + uint8_t type) const override; }; } // namespace static std::string getErrorLocation(MemoryBufferRef mb, const section_64 &sec, - const relocation_info &rel) { + relocation_info rel) { return ("invalid relocation at offset " + std::to_string(rel.r_address) + " of " + sec.segname + "," + sec.sectname + " in " + mb.getBufferIdentifier()) @@ -50,10 +52,9 @@ static std::string getErrorLocation(MemoryBufferRef mb, const section_64 &sec, } static void validateLength(MemoryBufferRef mb, const section_64 &sec, - const relocation_info &rel, - const std::vector<uint8_t> &validLengths) { - if (std::find(validLengths.begin(), validLengths.end(), rel.r_length) != - validLengths.end()) + relocation_info rel, + ArrayRef<uint8_t> validLengths) { + if (find(validLengths, rel.r_length) != validLengths.end()) return; std::string msg = getErrorLocation(mb, sec, rel) + ": relocations of type " + @@ -68,10 +69,20 @@ static void validateLength(MemoryBufferRef mb, const section_64 &sec, fatal(msg); } -uint64_t X86_64::getImplicitAddend(MemoryBufferRef mb, const section_64 &sec, - const relocation_info &rel) const { +bool X86_64::isPairedReloc(relocation_info rel) const { + return rel.r_type == X86_64_RELOC_SUBTRACTOR; +} + +uint64_t X86_64::getAddend(MemoryBufferRef mb, const section_64 &sec, + relocation_info rel, + relocation_info pairedRel) const { auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); const uint8_t *loc = buf + sec.offset + rel.r_address; + + if (isThreadLocalVariables(sec.flags) && rel.r_type != X86_64_RELOC_UNSIGNED) + error("relocations in thread-local variable sections must be " + "X86_64_RELOC_UNSIGNED"); + switch (rel.r_type) { case X86_64_RELOC_BRANCH: // XXX: ld64 also supports r_length = 0 here but I'm not sure when such a @@ -84,6 +95,7 @@ uint64_t X86_64::getImplicitAddend(MemoryBufferRef mb, const section_64 &sec, case X86_64_RELOC_SIGNED_4: case X86_64_RELOC_GOT_LOAD: case X86_64_RELOC_GOT: + case X86_64_RELOC_TLV: if (!rel.r_pcrel) fatal(getErrorLocation(mb, sec, rel) + ": relocations of type " + std::to_string(rel.r_type) + " must be pcrel"); @@ -123,6 +135,7 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t val) const { case X86_64_RELOC_SIGNED_4: case X86_64_RELOC_GOT_LOAD: case X86_64_RELOC_GOT: + case X86_64_RELOC_TLV: // These types are only used for pc-relative relocations, so offset by 4 // since the RIP has advanced by 4 at this point. This is only valid when // r_length = 2, which is enforced by validateLength(). @@ -132,7 +145,7 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t val) const { break; default: llvm_unreachable( - "getImplicitAddend should have flagged all unhandled relocation types"); + "getAddend should have flagged all unhandled relocation types"); } switch (r.length) { @@ -174,7 +187,7 @@ static constexpr uint8_t stub[] = { 0xff, 0x25, 0, 0, 0, 0, // jmpq *__la_symbol_ptr(%rip) }; -void X86_64::writeStub(uint8_t *buf, const DylibSymbol &sym) const { +void X86_64::writeStub(uint8_t *buf, const macho::Symbol &sym) const { memcpy(buf, stub, 2); // just copy the two nonzero bytes uint64_t stubAddr = in.stubs->addr + sym.stubsIndex * sizeof(stub); writeRipRelative(buf, stubAddr, sizeof(stub), @@ -209,37 +222,62 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym, in.stubHelper->addr); } -void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, +void X86_64::prepareSymbolRelocation(lld::macho::Symbol *sym, const InputSection *isec, const Reloc &r) { switch (r.type) { - case X86_64_RELOC_GOT_LOAD: - // TODO: implement mov -> lea relaxation for non-dynamic symbols - case X86_64_RELOC_GOT: + case X86_64_RELOC_GOT_LOAD: { + if (needsBinding(sym)) + in.got->addEntry(sym); + + if (sym->isTlv()) + error("found GOT relocation referencing thread-local variable in " + + toString(isec)); + break; + } + case X86_64_RELOC_GOT: { in.got->addEntry(sym); + + if (sym->isTlv()) + error("found GOT relocation referencing thread-local variable in " + + toString(isec)); break; + } case X86_64_RELOC_BRANCH: { - if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) - in.stubs->addEntry(*dysym); + prepareBranchTarget(sym); break; } case X86_64_RELOC_UNSIGNED: { - if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) { + if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { if (r.length != 3) { error("X86_64_RELOC_UNSIGNED referencing the dynamic symbol " + dysym->getName() + " must have r_length = 3"); return; } - in.binding->addEntry(dysym, isec, r.offset, r.addend); } + // References from thread-local variable sections are treated as offsets + // relative to the start of the referent section, and therefore have no + // need of rebase opcodes. + if (!(isThreadLocalVariables(isec->flags) && isa<Defined>(sym))) + addNonLazyBindingEntries(sym, isec, r.offset, r.addend); break; } case X86_64_RELOC_SIGNED: case X86_64_RELOC_SIGNED_1: case X86_64_RELOC_SIGNED_2: case X86_64_RELOC_SIGNED_4: + // TODO: warn if they refer to a weak global break; + case X86_64_RELOC_TLV: { + if (needsBinding(sym)) + in.tlvPointers->addEntry(sym); + + if (!sym->isTlv()) + error( + "found X86_64_RELOC_TLV referencing a non-thread-local variable in " + + toString(isec)); + break; + } case X86_64_RELOC_SUBTRACTOR: - case X86_64_RELOC_TLV: fatal("TODO: handle relocation type " + std::to_string(r.type)); break; default: @@ -247,24 +285,43 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, } } -uint64_t X86_64::getSymbolVA(const lld::macho::Symbol &sym, - uint8_t type) const { +uint64_t X86_64::resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &sym, + uint8_t type) const { switch (type) { - case X86_64_RELOC_GOT_LOAD: + case X86_64_RELOC_GOT_LOAD: { + if (!sym.isInGot()) { + if (buf[-2] != 0x8b) + error("X86_64_RELOC_GOT_LOAD must be used with movq instructions"); + buf[-2] = 0x8d; + return sym.getVA(); + } + LLVM_FALLTHROUGH; + } case X86_64_RELOC_GOT: return in.got->addr + sym.gotIndex * WordSize; - case X86_64_RELOC_BRANCH: - if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) - return in.stubs->addr + dysym->stubsIndex * sizeof(stub); + case X86_64_RELOC_BRANCH: { + if (sym.isInStubs()) + return in.stubs->addr + sym.stubsIndex * sizeof(stub); return sym.getVA(); + } case X86_64_RELOC_UNSIGNED: case X86_64_RELOC_SIGNED: case X86_64_RELOC_SIGNED_1: case X86_64_RELOC_SIGNED_2: case X86_64_RELOC_SIGNED_4: return sym.getVA(); + case X86_64_RELOC_TLV: { + if (sym.isInGot()) + return in.tlvPointers->addr + sym.gotIndex * WordSize; + + // Convert the movq to a leaq. + assert(isa<Defined>(&sym)); + if (buf[-2] != 0x8b) + error("X86_64_RELOC_TLV must be used with movq instructions"); + buf[-2] = 0x8d; + return sym.getVA(); + } case X86_64_RELOC_SUBTRACTOR: - case X86_64_RELOC_TLV: fatal("TODO: handle relocation type " + std::to_string(type)); default: llvm_unreachable("Unexpected relocation type"); diff --git a/contrib/llvm-project/lld/MachO/Config.h b/contrib/llvm-project/lld/MachO/Config.h index 79812a433563..f6e1f134d974 100644 --- a/contrib/llvm-project/lld/MachO/Config.h +++ b/contrib/llvm-project/lld/MachO/Config.h @@ -12,7 +12,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/VersionTuple.h" #include "llvm/TextAPI/MachO/Architecture.h" +#include "llvm/TextAPI/MachO/Platform.h" #include <vector> @@ -22,16 +24,50 @@ namespace macho { class Symbol; struct SymbolPriorityEntry; +struct PlatformInfo { + llvm::MachO::PlatformKind kind; + llvm::VersionTuple minimum; + llvm::VersionTuple sdk; +}; + +enum class UndefinedSymbolTreatment { + unknown, + error, + warning, + suppress, + dynamic_lookup, +}; + struct Configuration { Symbol *entry; bool hasReexports = false; + bool allLoad = false; + bool forceLoadObjC = false; + bool staticLink = false; + bool implicitDylibs = false; + bool isPic = false; + bool headerPadMaxInstallNames = false; + bool ltoNewPassManager = LLVM_ENABLE_NEW_PASS_MANAGER; + bool printEachFile = false; + bool printWhyLoad = false; + bool searchDylibsFirst = false; + bool saveTemps = false; + uint32_t headerPad; + uint32_t dylibCompatibilityVersion = 0; + uint32_t dylibCurrentVersion = 0; llvm::StringRef installName; llvm::StringRef outputFile; + llvm::StringRef ltoObjPath; + bool demangle = false; llvm::MachO::Architecture arch; + PlatformInfo platform; + UndefinedSymbolTreatment undefinedSymbolTreatment = + UndefinedSymbolTreatment::error; llvm::MachO::HeaderFileType outputType; + std::vector<llvm::StringRef> systemLibraryRoots; std::vector<llvm::StringRef> librarySearchPaths; - // TODO: use the framework search paths std::vector<llvm::StringRef> frameworkSearchPaths; + std::vector<llvm::StringRef> runtimePaths; llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities; }; diff --git a/contrib/llvm-project/lld/MachO/Driver.cpp b/contrib/llvm-project/lld/MachO/Driver.cpp index 2a3b0042162e..1b337f38f7ba 100644 --- a/contrib/llvm-project/lld/MachO/Driver.cpp +++ b/contrib/llvm-project/lld/MachO/Driver.cpp @@ -9,10 +9,13 @@ #include "Driver.h" #include "Config.h" #include "InputFiles.h" +#include "LTO.h" +#include "ObjC.h" #include "OutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" +#include "SyntheticSections.h" #include "Target.h" #include "Writer.h" @@ -21,97 +24,121 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/LLVM.h" #include "lld/Common/Memory.h" +#include "lld/Common/Reproduce.h" #include "lld/Common/Version.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/BinaryFormat/Magic.h" +#include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Option/ArgList.h" -#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TarWriter.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/TextAPI/MachO/PackedVersion.h" + +#include <algorithm> using namespace llvm; using namespace llvm::MachO; -using namespace llvm::sys; +using namespace llvm::object; using namespace llvm::opt; +using namespace llvm::sys; using namespace lld; using namespace lld::macho; Configuration *lld::macho::config; -// Create prefix string literals used in Options.td -#define PREFIX(NAME, VALUE) const char *NAME[] = VALUE; -#include "Options.inc" -#undef PREFIX - -// Create table mapping all options defined in Options.td -static const opt::OptTable::Info optInfo[] = { -#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \ - {X1, X2, X10, X11, OPT_##ID, opt::Option::KIND##Class, \ - X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12}, -#include "Options.inc" -#undef OPTION -}; - -MachOOptTable::MachOOptTable() : OptTable(optInfo) {} - -opt::InputArgList MachOOptTable::parse(ArrayRef<const char *> argv) { - // Make InputArgList from string vectors. - unsigned missingIndex; - unsigned missingCount; - SmallVector<const char *, 256> vec(argv.data(), argv.data() + argv.size()); - - opt::InputArgList args = ParseArgs(vec, missingIndex, missingCount); - - if (missingCount) - error(Twine(args.getArgString(missingIndex)) + ": missing argument"); - - for (opt::Arg *arg : args.filtered(OPT_UNKNOWN)) - error("unknown argument: " + arg->getSpelling()); - return args; +static HeaderFileType getOutputType(const opt::InputArgList &args) { + // TODO: -r, -dylinker, -preload... + opt::Arg *outputArg = args.getLastArg(OPT_bundle, OPT_dylib, OPT_execute); + if (outputArg == nullptr) + return MH_EXECUTE; + + switch (outputArg->getOption().getID()) { + case OPT_bundle: + return MH_BUNDLE; + case OPT_dylib: + return MH_DYLIB; + case OPT_execute: + return MH_EXECUTE; + default: + llvm_unreachable("internal error"); + } } -void MachOOptTable::printHelp(const char *argv0, bool showHidden) const { - PrintHelp(lld::outs(), (std::string(argv0) + " [options] file...").c_str(), - "LLVM Linker", showHidden); - lld::outs() << "\n"; +static Optional<std::string> +findAlongPathsWithExtensions(StringRef name, ArrayRef<StringRef> extensions) { + SmallString<261> base; + for (StringRef dir : config->librarySearchPaths) { + base = dir; + path::append(base, Twine("lib") + name); + for (StringRef ext : extensions) { + Twine location = base + ext; + if (fs::exists(location)) + return location.str(); + } + } + return {}; } static Optional<std::string> findLibrary(StringRef name) { - std::string stub = (llvm::Twine("lib") + name + ".tbd").str(); - std::string shared = (llvm::Twine("lib") + name + ".dylib").str(); - std::string archive = (llvm::Twine("lib") + name + ".a").str(); - llvm::SmallString<260> location; + if (config->searchDylibsFirst) { + if (Optional<std::string> path = + findAlongPathsWithExtensions(name, {".tbd", ".dylib"})) + return path; + return findAlongPathsWithExtensions(name, {".a"}); + } + return findAlongPathsWithExtensions(name, {".tbd", ".dylib", ".a"}); +} - for (StringRef dir : config->librarySearchPaths) { - for (StringRef library : {stub, shared, archive}) { - location = dir; - llvm::sys::path::append(location, library); - if (fs::exists(location)) - return location.str().str(); +static Optional<std::string> findFramework(StringRef name) { + SmallString<260> symlink; + StringRef suffix; + std::tie(name, suffix) = name.split(","); + for (StringRef dir : config->frameworkSearchPaths) { + symlink = dir; + path::append(symlink, name + ".framework", name); + + if (!suffix.empty()) { + // NOTE: we must resolve the symlink before trying the suffixes, because + // there are no symlinks for the suffixed paths. + SmallString<260> location; + if (!fs::real_path(symlink, location)) { + // only append suffix if realpath() succeeds + Twine suffixed = location + suffix; + if (fs::exists(suffixed)) + return suffixed.str(); + } + // Suffix lookup failed, fall through to the no-suffix case. } + + if (Optional<std::string> path = resolveDylibPath(symlink)) + return path; } return {}; } static TargetInfo *createTargetInfo(opt::InputArgList &args) { StringRef arch = args.getLastArgValue(OPT_arch, "x86_64"); - config->arch = llvm::MachO::getArchitectureFromName( + config->arch = MachO::getArchitectureFromName( args.getLastArgValue(OPT_arch, arch)); switch (config->arch) { - case llvm::MachO::AK_x86_64: - case llvm::MachO::AK_x86_64h: + case MachO::AK_x86_64: + case MachO::AK_x86_64h: return createX86_64TargetInfo(); default: fatal("missing or unsupported -arch " + arch); } } -static bool isDirectory(StringRef option, StringRef path) { +static bool warnIfNotDirectory(StringRef option, StringRef path) { if (!fs::exists(path)) { warn("directory not found for option -" + option + path); return false; @@ -122,40 +149,123 @@ static bool isDirectory(StringRef option, StringRef path) { return true; } -static void getSearchPaths(std::vector<StringRef> &paths, unsigned optionCode, - opt::InputArgList &args, - const SmallVector<StringRef, 2> &systemPaths) { - StringRef optionLetter{(optionCode == OPT_F ? "F" : "L")}; - for (auto const &path : args::getStrings(args, optionCode)) { - if (isDirectory(optionLetter, path)) +static std::vector<StringRef> +getSearchPaths(unsigned optionCode, opt::InputArgList &args, + const std::vector<StringRef> &roots, + const SmallVector<StringRef, 2> &systemPaths) { + std::vector<StringRef> paths; + StringRef optionLetter{optionCode == OPT_F ? "F" : "L"}; + for (StringRef path : args::getStrings(args, optionCode)) { + // NOTE: only absolute paths are re-rooted to syslibroot(s) + bool found = false; + if (path::is_absolute(path, path::Style::posix)) { + for (StringRef root : roots) { + SmallString<261> buffer(root); + path::append(buffer, path); + // Do not warn about paths that are computed via the syslib roots + if (fs::is_directory(buffer)) { + paths.push_back(saver.save(buffer.str())); + found = true; + } + } + } + if (!found && warnIfNotDirectory(optionLetter, path)) paths.push_back(path); } - if (!args.hasArg(OPT_Z) && Triple(sys::getProcessTriple()).isOSDarwin()) { - for (auto const &path : systemPaths) { - if (isDirectory(optionLetter, path)) - paths.push_back(path); + + // `-Z` suppresses the standard "system" search paths. + if (args.hasArg(OPT_Z)) + return paths; + + for (auto const &path : systemPaths) { + for (auto root : roots) { + SmallString<261> buffer(root); + path::append(buffer, path); + if (fs::is_directory(buffer)) + paths.push_back(saver.save(buffer.str())); } } + return paths; } -static void getLibrarySearchPaths(std::vector<StringRef> &paths, - opt::InputArgList &args) { - getSearchPaths(paths, OPT_L, args, {"/usr/lib", "/usr/local/lib"}); +static std::vector<StringRef> getSystemLibraryRoots(opt::InputArgList &args) { + std::vector<StringRef> roots; + for (const Arg *arg : args.filtered(OPT_syslibroot)) + roots.push_back(arg->getValue()); + // NOTE: the final `-syslibroot` being `/` will ignore all roots + if (roots.size() && roots.back() == "/") + roots.clear(); + // NOTE: roots can never be empty - add an empty root to simplify the library + // and framework search path computation. + if (roots.empty()) + roots.emplace_back(""); + return roots; } -static void getFrameworkSearchPaths(std::vector<StringRef> &paths, - opt::InputArgList &args) { - getSearchPaths(paths, OPT_F, args, - {"/Library/Frameworks", "/System/Library/Frameworks"}); +static std::vector<StringRef> +getLibrarySearchPaths(opt::InputArgList &args, + const std::vector<StringRef> &roots) { + return getSearchPaths(OPT_L, args, roots, {"/usr/lib", "/usr/local/lib"}); } -static void addFile(StringRef path) { +static std::vector<StringRef> +getFrameworkSearchPaths(opt::InputArgList &args, + const std::vector<StringRef> &roots) { + return getSearchPaths(OPT_F, args, roots, + {"/Library/Frameworks", "/System/Library/Frameworks"}); +} + +namespace { +struct ArchiveMember { + MemoryBufferRef mbref; + uint32_t modTime; +}; +} // namespace + +// Returns slices of MB by parsing MB as an archive file. +// Each slice consists of a member file in the archive. +static std::vector<ArchiveMember> getArchiveMembers(MemoryBufferRef mb) { + std::unique_ptr<Archive> file = + CHECK(Archive::create(mb), + mb.getBufferIdentifier() + ": failed to parse archive"); + Archive *archive = file.get(); + make<std::unique_ptr<Archive>>(std::move(file)); // take ownership + + std::vector<ArchiveMember> v; + Error err = Error::success(); + + // Thin archives refer to .o files, so --reproduces needs the .o files too. + bool addToTar = archive->isThin() && tar; + + for (const Archive::Child &c : archive->children(err)) { + MemoryBufferRef mbref = + CHECK(c.getMemoryBufferRef(), + mb.getBufferIdentifier() + + ": could not get the buffer for a child of the archive"); + if (addToTar) + tar->append(relativeToRoot(check(c.getFullName())), mbref.getBuffer()); + uint32_t modTime = toTimeT( + CHECK(c.getLastModified(), mb.getBufferIdentifier() + + ": could not get the modification " + "time for a child of the archive")); + v.push_back({mbref, modTime}); + } + if (err) + fatal(mb.getBufferIdentifier() + + ": Archive::children failed: " + toString(std::move(err))); + + return v; +} + +static InputFile *addFile(StringRef path, bool forceLoadArchive) { Optional<MemoryBufferRef> buffer = readFile(path); if (!buffer) - return; + return nullptr; MemoryBufferRef mbref = *buffer; + InputFile *newFile = nullptr; - switch (identify_magic(mbref.getBuffer())) { + auto magic = identify_magic(mbref.getBuffer()); + switch (magic) { case file_magic::archive: { std::unique_ptr<object::Archive> file = CHECK( object::Archive::create(mbref), path + ": failed to parse archive"); @@ -163,48 +273,141 @@ static void addFile(StringRef path) { if (!file->isEmpty() && !file->hasSymbolTable()) error(path + ": archive has no index; run ranlib to add one"); - inputFiles.push_back(make<ArchiveFile>(std::move(file))); + if (config->allLoad || forceLoadArchive) { + if (Optional<MemoryBufferRef> buffer = readFile(path)) { + for (const ArchiveMember &member : getArchiveMembers(*buffer)) { + inputFiles.insert(make<ObjFile>(member.mbref, member.modTime, path)); + printArchiveMemberLoad( + (forceLoadArchive ? "-force_load" : "-all_load"), + inputFiles.back()); + } + } + } else if (config->forceLoadObjC) { + for (const object::Archive::Symbol &sym : file->symbols()) + if (sym.getName().startswith(objc::klass)) + symtab->addUndefined(sym.getName(), /*isWeakRef=*/false); + + // TODO: no need to look for ObjC sections for a given archive member if + // we already found that it contains an ObjC symbol. We should also + // consider creating a LazyObjFile class in order to avoid double-loading + // these files here and below (as part of the ArchiveFile). + if (Optional<MemoryBufferRef> buffer = readFile(path)) { + for (const ArchiveMember &member : getArchiveMembers(*buffer)) { + if (hasObjCSection(member.mbref)) { + inputFiles.insert( + make<ObjFile>(member.mbref, member.modTime, path)); + printArchiveMemberLoad("-ObjC", inputFiles.back()); + } + } + } + } + + newFile = make<ArchiveFile>(std::move(file)); break; } case file_magic::macho_object: - inputFiles.push_back(make<ObjFile>(mbref)); + newFile = make<ObjFile>(mbref, getModTime(path), ""); break; case file_magic::macho_dynamically_linked_shared_lib: - inputFiles.push_back(make<DylibFile>(mbref)); - break; + case file_magic::macho_dynamically_linked_shared_lib_stub: case file_magic::tapi_file: { - llvm::Expected<std::unique_ptr<llvm::MachO::InterfaceFile>> result = - TextAPIReader::get(mbref); - if (!result) - return; - - inputFiles.push_back(make<DylibFile>(std::move(*result))); + if (Optional<DylibFile *> dylibFile = loadDylib(mbref)) + newFile = *dylibFile; break; } + case file_magic::bitcode: + newFile = make<BitcodeFile>(mbref); + break; default: error(path + ": unhandled file type"); } + if (newFile) { + // printArchiveMemberLoad() prints both .a and .o names, so no need to + // print the .a name here. + if (config->printEachFile && magic != file_magic::archive) + message(toString(newFile)); + inputFiles.insert(newFile); + } + return newFile; +} + +static void addLibrary(StringRef name, bool isWeak) { + if (Optional<std::string> path = findLibrary(name)) { + auto *dylibFile = dyn_cast_or_null<DylibFile>(addFile(*path, false)); + if (isWeak && dylibFile) + dylibFile->forceWeakImport = true; + return; + } + error("library not found for -l" + name); +} + +static void addFramework(StringRef name, bool isWeak) { + if (Optional<std::string> path = findFramework(name)) { + auto *dylibFile = dyn_cast_or_null<DylibFile>(addFile(*path, false)); + if (isWeak && dylibFile) + dylibFile->forceWeakImport = true; + return; + } + error("framework not found for -framework " + name); +} + +// Parses LC_LINKER_OPTION contents, which can add additional command line flags. +void macho::parseLCLinkerOption(InputFile* f, unsigned argc, StringRef data) { + SmallVector<const char *, 4> argv; + size_t offset = 0; + for (unsigned i = 0; i < argc && offset < data.size(); ++i) { + argv.push_back(data.data() + offset); + offset += strlen(data.data() + offset) + 1; + } + if (argv.size() != argc || offset > data.size()) + fatal(toString(f) + ": invalid LC_LINKER_OPTION"); + + MachOOptTable table; + unsigned missingIndex, missingCount; + opt::InputArgList args = table.ParseArgs(argv, missingIndex, missingCount); + if (missingCount) + fatal(Twine(args.getArgString(missingIndex)) + ": missing argument"); + for (auto *arg : args.filtered(OPT_UNKNOWN)) + error("unknown argument: " + arg->getAsString(args)); + + for (auto *arg : args) { + switch (arg->getOption().getID()) { + case OPT_l: + addLibrary(arg->getValue(), false); + break; + case OPT_framework: + addFramework(arg->getValue(), false); + break; + default: + error(arg->getSpelling() + " is not allowed in LC_LINKER_OPTION"); + } + } } -static std::array<StringRef, 6> archNames{"arm", "arm64", "i386", - "x86_64", "ppc", "ppc64"}; -static bool isArchString(StringRef s) { - static DenseSet<StringRef> archNamesSet(archNames.begin(), archNames.end()); - return archNamesSet.find(s) != archNamesSet.end(); +static void addFileList(StringRef path) { + Optional<MemoryBufferRef> buffer = readFile(path); + if (!buffer) + return; + MemoryBufferRef mbref = *buffer; + for (StringRef path : args::getLines(mbref)) + addFile(path, false); } // An order file has one entry per line, in the following format: // -// <arch>:<object file>:<symbol name> +// <cpu>:<object file>:<symbol name> // -// <arch> and <object file> are optional. If not specified, then that entry -// matches any symbol of that name. +// <cpu> and <object file> are optional. If not specified, then that entry +// matches any symbol of that name. Parsing this format is not quite +// straightforward because the symbol name itself can contain colons, so when +// encountering a colon, we consider the preceding characters to decide if it +// can be a valid CPU type or file path. // // If a symbol is matched by multiple entries, then it takes the lowest-ordered // entry (the one nearest to the front of the list.) // // The file can also have line comments that start with '#'. -void parseOrderFile(StringRef path) { +static void parseOrderFile(StringRef path) { Optional<MemoryBufferRef> buffer = readFile(path); if (!buffer) { error("Could not read order file at " + path); @@ -213,59 +416,37 @@ void parseOrderFile(StringRef path) { MemoryBufferRef mbref = *buffer; size_t priority = std::numeric_limits<size_t>::max(); - for (StringRef rest : args::getLines(mbref)) { - StringRef arch, objectFile, symbol; - - std::array<StringRef, 3> fields; - uint8_t fieldCount = 0; - while (rest != "" && fieldCount < 3) { - std::pair<StringRef, StringRef> p = getToken(rest, ": \t\n\v\f\r"); - StringRef tok = p.first; - rest = p.second; - - // Check if we have a comment - if (tok == "" || tok[0] == '#') - break; - - fields[fieldCount++] = tok; - } - - switch (fieldCount) { - case 3: - arch = fields[0]; - objectFile = fields[1]; - symbol = fields[2]; - break; - case 2: - (isArchString(fields[0]) ? arch : objectFile) = fields[0]; - symbol = fields[1]; - break; - case 1: - symbol = fields[0]; - break; - case 0: - break; - default: - llvm_unreachable("too many fields in order file"); - } + for (StringRef line : args::getLines(mbref)) { + StringRef objectFile, symbol; + line = line.take_until([](char c) { return c == '#'; }); // ignore comments + line = line.ltrim(); + + CPUType cpuType = StringSwitch<CPUType>(line) + .StartsWith("i386:", CPU_TYPE_I386) + .StartsWith("x86_64:", CPU_TYPE_X86_64) + .StartsWith("arm:", CPU_TYPE_ARM) + .StartsWith("arm64:", CPU_TYPE_ARM64) + .StartsWith("ppc:", CPU_TYPE_POWERPC) + .StartsWith("ppc64:", CPU_TYPE_POWERPC64) + .Default(CPU_TYPE_ANY); + // Drop the CPU type as well as the colon + if (cpuType != CPU_TYPE_ANY) + line = line.drop_until([](char c) { return c == ':'; }).drop_front(); + // TODO: Update when we extend support for other CPUs + if (cpuType != CPU_TYPE_ANY && cpuType != CPU_TYPE_X86_64) + continue; - if (!arch.empty()) { - if (!isArchString(arch)) { - error("invalid arch \"" + arch + "\" in order file: expected one of " + - llvm::join(archNames, ", ")); - continue; + constexpr std::array<StringRef, 2> fileEnds = {".o:", ".o):"}; + for (StringRef fileEnd : fileEnds) { + size_t pos = line.find(fileEnd); + if (pos != StringRef::npos) { + // Split the string around the colon + objectFile = line.take_front(pos + fileEnd.size() - 1); + line = line.drop_front(pos + fileEnd.size()); + break; } - - // TODO: Update when we extend support for other archs - if (arch != "x86_64") - continue; - } - - if (!objectFile.empty() && !objectFile.endswith(".o")) { - error("invalid object file name \"" + objectFile + - "\" in order file: should end with .o"); - continue; } + symbol = line.trim(); if (!symbol.empty()) { SymbolPriorityEntry &entry = config->priorities[symbol]; @@ -280,12 +461,16 @@ void parseOrderFile(StringRef path) { } // We expect sub-library names of the form "libfoo", which will match a dylib -// with a path of .*/libfoo.dylib. -static bool markSubLibrary(StringRef searchName) { +// with a path of .*/libfoo.{dylib, tbd}. +// XXX ld64 seems to ignore the extension entirely when matching sub-libraries; +// I'm not sure what the use case for that is. +static bool markReexport(StringRef searchName, ArrayRef<StringRef> extensions) { for (InputFile *file : inputFiles) { if (auto *dylibFile = dyn_cast<DylibFile>(file)) { StringRef filename = path::filename(dylibFile->getName()); - if (filename.consume_front(searchName) && filename == ".dylib") { + if (filename.consume_front(searchName) && + (filename.empty() || + find(extensions, filename) != extensions.end())) { dylibFile->reexport = true; return true; } @@ -294,8 +479,113 @@ static bool markSubLibrary(StringRef searchName) { return false; } +// This function is called on startup. We need this for LTO since +// LTO calls LLVM functions to compile bitcode files to native code. +// Technically this can be delayed until we read bitcode files, but +// we don't bother to do lazily because the initialization is fast. +static void initLLVM() { + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); +} + +static void compileBitcodeFiles() { + auto lto = make<BitcodeCompiler>(); + for (InputFile *file : inputFiles) + if (auto *bitcodeFile = dyn_cast<BitcodeFile>(file)) + lto->add(*bitcodeFile); + + for (ObjFile *file : lto->compile()) + inputFiles.insert(file); +} + +// Replaces common symbols with defined symbols residing in __common sections. +// This function must be called after all symbol names are resolved (i.e. after +// all InputFiles have been loaded.) As a result, later operations won't see +// any CommonSymbols. +static void replaceCommonSymbols() { + for (macho::Symbol *sym : symtab->getSymbols()) { + auto *common = dyn_cast<CommonSymbol>(sym); + if (common == nullptr) + continue; + + auto *isec = make<InputSection>(); + isec->file = common->file; + isec->name = section_names::common; + isec->segname = segment_names::data; + isec->align = common->align; + // Casting to size_t will truncate large values on 32-bit architectures, + // but it's not really worth supporting the linking of 64-bit programs on + // 32-bit archs. + isec->data = {nullptr, static_cast<size_t>(common->size)}; + isec->flags = S_ZEROFILL; + inputSections.push_back(isec); + + replaceSymbol<Defined>(sym, sym->getName(), isec, /*value=*/0, + /*isWeakDef=*/false, + /*isExternal=*/true, common->privateExtern); + } +} + +static inline char toLowerDash(char x) { + if (x >= 'A' && x <= 'Z') + return x - 'A' + 'a'; + else if (x == ' ') + return '-'; + return x; +} + +static std::string lowerDash(StringRef s) { + return std::string(map_iterator(s.begin(), toLowerDash), + map_iterator(s.end(), toLowerDash)); +} + static void handlePlatformVersion(const opt::Arg *arg) { - // TODO: implementation coming very soon ... + StringRef platformStr = arg->getValue(0); + StringRef minVersionStr = arg->getValue(1); + StringRef sdkVersionStr = arg->getValue(2); + + // TODO(compnerd) see if we can generate this case list via XMACROS + config->platform.kind = + StringSwitch<PlatformKind>(lowerDash(platformStr)) + .Cases("macos", "1", PlatformKind::macOS) + .Cases("ios", "2", PlatformKind::iOS) + .Cases("tvos", "3", PlatformKind::tvOS) + .Cases("watchos", "4", PlatformKind::watchOS) + .Cases("bridgeos", "5", PlatformKind::bridgeOS) + .Cases("mac-catalyst", "6", PlatformKind::macCatalyst) + .Cases("ios-simulator", "7", PlatformKind::iOSSimulator) + .Cases("tvos-simulator", "8", PlatformKind::tvOSSimulator) + .Cases("watchos-simulator", "9", PlatformKind::watchOSSimulator) + .Cases("driverkit", "10", PlatformKind::driverKit) + .Default(PlatformKind::unknown); + if (config->platform.kind == PlatformKind::unknown) + error(Twine("malformed platform: ") + platformStr); + // TODO: check validity of version strings, which varies by platform + // NOTE: ld64 accepts version strings with 5 components + // llvm::VersionTuple accepts no more than 4 components + // Has Apple ever published version strings with 5 components? + if (config->platform.minimum.tryParse(minVersionStr)) + error(Twine("malformed minimum version: ") + minVersionStr); + if (config->platform.sdk.tryParse(sdkVersionStr)) + error(Twine("malformed sdk version: ") + sdkVersionStr); +} + +static void handleUndefined(const opt::Arg *arg) { + StringRef treatmentStr = arg->getValue(0); + config->undefinedSymbolTreatment = + StringSwitch<UndefinedSymbolTreatment>(treatmentStr) + .Case("error", UndefinedSymbolTreatment::error) + .Case("warning", UndefinedSymbolTreatment::warning) + .Case("suppress", UndefinedSymbolTreatment::suppress) + .Case("dynamic_lookup", UndefinedSymbolTreatment::dynamic_lookup) + .Default(UndefinedSymbolTreatment::unknown); + if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::unknown) { + warn(Twine("unknown -undefined TREATMENT '") + treatmentStr + + "', defaulting to 'error'"); + config->undefinedSymbolTreatment = UndefinedSymbolTreatment::error; + } } static void warnIfDeprecatedOption(const opt::Option &opt) { @@ -308,7 +598,7 @@ static void warnIfDeprecatedOption(const opt::Option &opt) { } static void warnIfUnimplementedOption(const opt::Option &opt) { - if (!opt.getGroup().isValid()) + if (!opt.getGroup().isValid() || !opt.hasFlag(DriverFlag::HelpHidden)) return; switch (opt.getGroup().getID()) { case OPT_grp_deprecated: @@ -332,7 +622,62 @@ static void warnIfUnimplementedOption(const opt::Option &opt) { } } -bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly, +static const char *getReproduceOption(opt::InputArgList &args) { + if (auto *arg = args.getLastArg(OPT_reproduce)) + return arg->getValue(); + return getenv("LLD_REPRODUCE"); +} + +static bool isPie(opt::InputArgList &args) { + if (config->outputType != MH_EXECUTE || args.hasArg(OPT_no_pie)) + return false; + + // TODO: add logic here as we support more archs. E.g. i386 should default + // to PIE from 10.7, arm64 should always be PIE, etc + assert(config->arch == AK_x86_64 || config->arch == AK_x86_64h); + + PlatformKind kind = config->platform.kind; + if (kind == PlatformKind::macOS && + config->platform.minimum >= VersionTuple(10, 6)) + return true; + + if (kind == PlatformKind::iOSSimulator || kind == PlatformKind::driverKit) + return true; + + return args.hasArg(OPT_pie); +} + +static void parseClangOption(StringRef opt, const Twine &msg) { + std::string err; + raw_string_ostream os(err); + + const char *argv[] = {"lld", opt.data()}; + if (cl::ParseCommandLineOptions(2, argv, "", &os)) + return; + os.flush(); + error(msg + ": " + StringRef(err).trim()); +} + +static uint32_t parseDylibVersion(const opt::ArgList& args, unsigned id) { + const opt::Arg *arg = args.getLastArg(id); + if (!arg) + return 0; + + if (config->outputType != MH_DYLIB) { + error(arg->getAsString(args) + ": only valid with -dylib"); + return 0; + } + + PackedVersion version; + if (!version.parse32(arg->getValue())) { + error(arg->getAsString(args) + ": malformed version"); + return 0; + } + + return version.rawValue(); +} + +bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly, raw_ostream &stdoutOS, raw_ostream &stderrOS) { lld::stdoutOS = &stdoutOS; lld::stderrOS = &stderrOS; @@ -340,95 +685,188 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly, stderrOS.enable_colors(stderrOS.has_colors()); // TODO: Set up error handler properly, e.g. the errorLimitExceededMsg + errorHandler().cleanupCallback = []() { freeArena(); }; + MachOOptTable parser; opt::InputArgList args = parser.parse(argsArr.slice(1)); if (args.hasArg(OPT_help_hidden)) { parser.printHelp(argsArr[0], /*showHidden=*/true); return true; - } else if (args.hasArg(OPT_help)) { + } + if (args.hasArg(OPT_help)) { parser.printHelp(argsArr[0], /*showHidden=*/false); return true; } + if (args.hasArg(OPT_version)) { + message(getLLDVersion()); + return true; + } + + if (const char *path = getReproduceOption(args)) { + // Note that --reproduce is a debug option so you can ignore it + // if you are trying to understand the whole picture of the code. + Expected<std::unique_ptr<TarWriter>> errOrWriter = + TarWriter::create(path, path::stem(path)); + if (errOrWriter) { + tar = std::move(*errOrWriter); + tar->append("response.txt", createResponseFile(args)); + tar->append("version.txt", getLLDVersion() + "\n"); + } else { + error("--reproduce: " + toString(errOrWriter.takeError())); + } + } config = make<Configuration>(); symtab = make<SymbolTable>(); target = createTargetInfo(args); - config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main")); + config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"), + /*isWeakRef=*/false); config->outputFile = args.getLastArgValue(OPT_o, "a.out"); config->installName = args.getLastArgValue(OPT_install_name, config->outputFile); - getLibrarySearchPaths(config->librarySearchPaths, args); - getFrameworkSearchPaths(config->frameworkSearchPaths, args); - config->outputType = args.hasArg(OPT_dylib) ? MH_DYLIB : MH_EXECUTE; + config->headerPad = args::getHex(args, OPT_headerpad, /*Default=*/32); + config->headerPadMaxInstallNames = + args.hasArg(OPT_headerpad_max_install_names); + config->printEachFile = args.hasArg(OPT_t); + config->printWhyLoad = args.hasArg(OPT_why_load); + config->outputType = getOutputType(args); + config->ltoObjPath = args.getLastArgValue(OPT_object_path_lto); + config->ltoNewPassManager = + args.hasFlag(OPT_no_lto_legacy_pass_manager, OPT_lto_legacy_pass_manager, + LLVM_ENABLE_NEW_PASS_MANAGER); + config->runtimePaths = args::getStrings(args, OPT_rpath); + config->allLoad = args.hasArg(OPT_all_load); + config->forceLoadObjC = args.hasArg(OPT_ObjC); + config->demangle = args.hasArg(OPT_demangle); + config->implicitDylibs = !args.hasArg(OPT_no_implicit_dylibs); + + if (const opt::Arg *arg = args.getLastArg(OPT_static, OPT_dynamic)) + config->staticLink = (arg->getOption().getID() == OPT_static); + + config->systemLibraryRoots = getSystemLibraryRoots(args); + config->librarySearchPaths = + getLibrarySearchPaths(args, config->systemLibraryRoots); + config->frameworkSearchPaths = + getFrameworkSearchPaths(args, config->systemLibraryRoots); + if (const opt::Arg *arg = + args.getLastArg(OPT_search_paths_first, OPT_search_dylibs_first)) + config->searchDylibsFirst = + arg->getOption().getID() == OPT_search_dylibs_first; + + config->dylibCompatibilityVersion = + parseDylibVersion(args, OPT_compatibility_version); + config->dylibCurrentVersion = parseDylibVersion(args, OPT_current_version); + + config->saveTemps = args.hasArg(OPT_save_temps); if (args.hasArg(OPT_v)) { message(getLLDVersion()); message(StringRef("Library search paths:") + (config->librarySearchPaths.size() - ? "\n\t" + llvm::join(config->librarySearchPaths, "\n\t") + ? "\n\t" + join(config->librarySearchPaths, "\n\t") : "")); message(StringRef("Framework search paths:") + (config->frameworkSearchPaths.size() - ? "\n\t" + llvm::join(config->frameworkSearchPaths, "\n\t") + ? "\n\t" + join(config->frameworkSearchPaths, "\n\t") : "")); freeArena(); return !errorCount(); } + initLLVM(); // must be run before any call to addFile() + for (const auto &arg : args) { const auto &opt = arg->getOption(); warnIfDeprecatedOption(opt); - switch (arg->getOption().getID()) { + warnIfUnimplementedOption(opt); + // TODO: are any of these better handled via filtered() or getLastArg()? + switch (opt.getID()) { case OPT_INPUT: - addFile(arg->getValue()); + addFile(arg->getValue(), false); break; - case OPT_l: { - StringRef name = arg->getValue(); - if (Optional<std::string> path = findLibrary(name)) { - addFile(*path); - break; - } - error("library not found for -l" + name); + case OPT_weak_library: { + auto *dylibFile = + dyn_cast_or_null<DylibFile>(addFile(arg->getValue(), false)); + if (dylibFile) + dylibFile->forceWeakImport = true; break; } + case OPT_filelist: + addFileList(arg->getValue()); + break; + case OPT_force_load: + addFile(arg->getValue(), true); + break; + case OPT_l: + case OPT_weak_l: + addLibrary(arg->getValue(), opt.getID() == OPT_weak_l); + break; + case OPT_framework: + case OPT_weak_framework: + addFramework(arg->getValue(), opt.getID() == OPT_weak_framework); + break; case OPT_platform_version: handlePlatformVersion(arg); break; - case OPT_o: - case OPT_dylib: - case OPT_e: - case OPT_L: - case OPT_Z: - case OPT_arch: - // handled elsewhere + case OPT_undefined: + handleUndefined(arg); break; default: - warnIfUnimplementedOption(opt); break; } } + config->isPic = config->outputType == MH_DYLIB || + config->outputType == MH_BUNDLE || isPie(args); + // Now that all dylibs have been loaded, search for those that should be // re-exported. - for (opt::Arg *arg : args.filtered(OPT_sub_library)) { + for (opt::Arg *arg : args.filtered(OPT_sub_library, OPT_sub_umbrella)) { config->hasReexports = true; StringRef searchName = arg->getValue(); - if (!markSubLibrary(searchName)) - error("-sub_library " + searchName + " does not match a supplied dylib"); + std::vector<StringRef> extensions; + if (arg->getOption().getID() == OPT_sub_library) + extensions = {".dylib", ".tbd"}; + else + extensions = {".tbd"}; + if (!markReexport(searchName, extensions)) + error(arg->getSpelling() + " " + searchName + + " does not match a supplied dylib"); } + // Parse LTO options. + if (auto *arg = args.getLastArg(OPT_mcpu)) + parseClangOption(saver.save("-mcpu=" + StringRef(arg->getValue())), + arg->getSpelling()); + + for (auto *arg : args.filtered(OPT_mllvm)) + parseClangOption(arg->getValue(), arg->getSpelling()); + + compileBitcodeFiles(); + replaceCommonSymbols(); + StringRef orderFile = args.getLastArgValue(OPT_order_file); if (!orderFile.empty()) parseOrderFile(orderFile); - if (config->outputType == MH_EXECUTE && !isa<Defined>(config->entry)) { - error("undefined symbol: " + config->entry->getName()); + if (config->outputType == MH_EXECUTE && isa<Undefined>(config->entry)) { + error("undefined symbol: " + toString(*config->entry)); return false; } createSyntheticSections(); + symtab->addDSOHandle(in.header); + + for (opt::Arg *arg : args.filtered(OPT_sectcreate)) { + StringRef segName = arg->getValue(0); + StringRef sectName = arg->getValue(1); + StringRef fileName = arg->getValue(2); + Optional<MemoryBufferRef> buffer = readFile(fileName); + if (buffer) + inputFiles.insert(make<OpaqueFile>(*buffer, segName, sectName)); + } // Initialize InputSections. for (InputFile *file : inputFiles) { @@ -446,6 +884,5 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly, if (canExitEarly) exitLld(errorCount() ? 1 : 0); - freeArena(); return !errorCount(); } diff --git a/contrib/llvm-project/lld/MachO/Driver.h b/contrib/llvm-project/lld/MachO/Driver.h index 2233740d1db8..97b897f0271b 100644 --- a/contrib/llvm-project/lld/MachO/Driver.h +++ b/contrib/llvm-project/lld/MachO/Driver.h @@ -10,11 +10,17 @@ #define LLD_MACHO_DRIVER_H #include "lld/Common/LLVM.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Option/OptTable.h" +#include "llvm/Support/MemoryBuffer.h" namespace lld { namespace macho { +class DylibFile; +class InputFile; + class MachOOptTable : public llvm::opt::OptTable { public: MachOOptTable(); @@ -30,6 +36,20 @@ enum { #undef OPTION }; +void parseLCLinkerOption(InputFile*, unsigned argc, StringRef data); + +std::string createResponseFile(const llvm::opt::InputArgList &args); + +// Check for both libfoo.dylib and libfoo.tbd (in that order). +llvm::Optional<std::string> resolveDylibPath(llvm::StringRef path); + +llvm::Optional<DylibFile *> loadDylib(llvm::MemoryBufferRef mbref, + DylibFile *umbrella = nullptr); + +uint32_t getModTime(llvm::StringRef path); + +void printArchiveMemberLoad(StringRef reason, const InputFile *); + } // namespace macho } // namespace lld diff --git a/contrib/llvm-project/lld/MachO/DriverUtils.cpp b/contrib/llvm-project/lld/MachO/DriverUtils.cpp new file mode 100644 index 000000000000..563ae266735d --- /dev/null +++ b/contrib/llvm-project/lld/MachO/DriverUtils.cpp @@ -0,0 +1,214 @@ +//===- DriverUtils.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Config.h" +#include "InputFiles.h" + +#include "lld/Common/Args.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "lld/Common/Reproduce.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" +#include "llvm/TextAPI/MachO/TextAPIReader.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace llvm::opt; +using namespace llvm::sys; +using namespace lld; +using namespace lld::macho; + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const opt::OptTable::Info optInfo[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \ + {X1, X2, X10, X11, OPT_##ID, opt::Option::KIND##Class, \ + X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12}, +#include "Options.inc" +#undef OPTION +}; + +MachOOptTable::MachOOptTable() : OptTable(optInfo) {} + +// Set color diagnostics according to --color-diagnostics={auto,always,never} +// or --no-color-diagnostics flags. +static void handleColorDiagnostics(opt::InputArgList &args) { + auto *arg = args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq, + OPT_no_color_diagnostics); + if (!arg) + return; + if (arg->getOption().getID() == OPT_color_diagnostics) { + lld::errs().enable_colors(true); + } else if (arg->getOption().getID() == OPT_no_color_diagnostics) { + lld::errs().enable_colors(false); + } else { + StringRef s = arg->getValue(); + if (s == "always") + lld::errs().enable_colors(true); + else if (s == "never") + lld::errs().enable_colors(false); + else if (s != "auto") + error("unknown option: --color-diagnostics=" + s); + } +} + +opt::InputArgList MachOOptTable::parse(ArrayRef<const char *> argv) { + // Make InputArgList from string vectors. + unsigned missingIndex; + unsigned missingCount; + SmallVector<const char *, 256> vec(argv.data(), argv.data() + argv.size()); + + // Expand response files (arguments in the form of @<filename>) + // and then parse the argument again. + cl::ExpandResponseFiles(saver, cl::TokenizeGNUCommandLine, vec); + opt::InputArgList args = ParseArgs(vec, missingIndex, missingCount); + + // Handle -fatal_warnings early since it converts missing argument warnings + // to errors. + errorHandler().fatalWarnings = args.hasArg(OPT_fatal_warnings); + + if (missingCount) + error(Twine(args.getArgString(missingIndex)) + ": missing argument"); + + handleColorDiagnostics(args); + + for (opt::Arg *arg : args.filtered(OPT_UNKNOWN)) { + std::string nearest; + if (findNearest(arg->getAsString(args), nearest) > 1) + error("unknown argument '" + arg->getAsString(args) + "'"); + else + error("unknown argument '" + arg->getAsString(args) + + "', did you mean '" + nearest + "'"); + } + return args; +} + +void MachOOptTable::printHelp(const char *argv0, bool showHidden) const { + PrintHelp(lld::outs(), (std::string(argv0) + " [options] file...").c_str(), + "LLVM Linker", showHidden); + lld::outs() << "\n"; +} + +static std::string rewritePath(StringRef s) { + if (fs::exists(s)) + return relativeToRoot(s); + return std::string(s); +} + +// Reconstructs command line arguments so that so that you can re-run +// the same command with the same inputs. This is for --reproduce. +std::string macho::createResponseFile(const opt::InputArgList &args) { + SmallString<0> data; + raw_svector_ostream os(data); + + // Copy the command line to the output while rewriting paths. + for (auto *arg : args) { + switch (arg->getOption().getID()) { + case OPT_reproduce: + break; + case OPT_INPUT: + os << quote(rewritePath(arg->getValue())) << "\n"; + break; + case OPT_o: + os << "-o " << quote(path::filename(arg->getValue())) << "\n"; + break; + case OPT_filelist: + if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue())) + for (StringRef path : args::getLines(*buffer)) + os << quote(rewritePath(path)) << "\n"; + break; + case OPT_force_load: + case OPT_rpath: + case OPT_syslibroot: + case OPT_F: + case OPT_L: + case OPT_order_file: + os << arg->getSpelling() << " " << quote(rewritePath(arg->getValue())) + << "\n"; + break; + case OPT_sectcreate: + os << arg->getSpelling() << " " << quote(arg->getValue(0)) << " " + << quote(arg->getValue(1)) << " " + << quote(rewritePath(arg->getValue(2))) << "\n"; + break; + default: + os << toString(*arg) << "\n"; + } + } + return std::string(data.str()); +} + +Optional<std::string> macho::resolveDylibPath(StringRef path) { + // TODO: if a tbd and dylib are both present, we should check to make sure + // they are consistent. + if (fs::exists(path)) + return std::string(path); + + SmallString<261> location = path; + path::replace_extension(location, ".tbd"); + if (fs::exists(location)) + return std::string(location); + + return {}; +} + +// It's not uncommon to have multiple attempts to load a single dylib, +// especially if it's a commonly re-exported core library. +static DenseMap<CachedHashStringRef, DylibFile *> loadedDylibs; + +Optional<DylibFile *> macho::loadDylib(MemoryBufferRef mbref, + DylibFile *umbrella) { + StringRef path = mbref.getBufferIdentifier(); + DylibFile *&file = loadedDylibs[CachedHashStringRef(path)]; + if (file) + return file; + + file_magic magic = identify_magic(mbref.getBuffer()); + if (magic == file_magic::tapi_file) { + Expected<std::unique_ptr<InterfaceFile>> result = TextAPIReader::get(mbref); + if (!result) { + error("could not load TAPI file at " + mbref.getBufferIdentifier() + + ": " + toString(result.takeError())); + return {}; + } + file = make<DylibFile>(**result, umbrella); + } else { + assert(magic == file_magic::macho_dynamically_linked_shared_lib || + magic == file_magic::macho_dynamically_linked_shared_lib_stub); + file = make<DylibFile>(mbref, umbrella); + } + return file; +} + +uint32_t macho::getModTime(StringRef path) { + fs::file_status stat; + if (!fs::status(path, stat)) + if (fs::exists(stat)) + return toTimeT(stat.getLastModificationTime()); + + warn("failed to get modification time of " + path); + return 0; +} + +void macho::printArchiveMemberLoad(StringRef reason, const InputFile *f) { + if (config->printEachFile) + message(toString(f)); + if (config->printWhyLoad) + message(reason + " forced load of " + toString(f)); +} diff --git a/contrib/llvm-project/lld/MachO/Dwarf.cpp b/contrib/llvm-project/lld/MachO/Dwarf.cpp new file mode 100644 index 000000000000..3e794922ad1d --- /dev/null +++ b/contrib/llvm-project/lld/MachO/Dwarf.cpp @@ -0,0 +1,42 @@ +//===- DWARF.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Dwarf.h" +#include "InputFiles.h" +#include "InputSection.h" +#include "OutputSegment.h" + +#include <memory> + +using namespace lld; +using namespace lld::macho; +using namespace llvm; + +std::unique_ptr<DwarfObject> DwarfObject::create(ObjFile *obj) { + auto dObj = std::make_unique<DwarfObject>(); + bool hasDwarfInfo = false; + // LLD only needs to extract the source file path from the debug info, so we + // initialize DwarfObject with just the sections necessary to get that path. + // The debugger will locate the debug info via the object file paths that we + // emit in our STABS symbols, so we don't need to process & emit them + // ourselves. + for (InputSection *isec : obj->debugSections) { + if (StringRef *s = StringSwitch<StringRef *>(isec->name) + .Case("__debug_info", &dObj->infoSection.Data) + .Case("__debug_abbrev", &dObj->abbrevSection) + .Case("__debug_str", &dObj->strSection) + .Default(nullptr)) { + *s = toStringRef(isec->data); + hasDwarfInfo = true; + } + } + + if (hasDwarfInfo) + return dObj; + return nullptr; +} diff --git a/contrib/llvm-project/lld/MachO/Dwarf.h b/contrib/llvm-project/lld/MachO/Dwarf.h new file mode 100644 index 000000000000..119f2778fc6b --- /dev/null +++ b/contrib/llvm-project/lld/MachO/Dwarf.h @@ -0,0 +1,53 @@ +//===- DWARF.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------------===// + +#ifndef LLD_MACHO_DWARF_H +#define LLD_MACHO_DWARF_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DWARF/DWARFObject.h" + +namespace lld { +namespace macho { + +class ObjFile; + +// Implements the interface between LLVM's DWARF-parsing utilities and LLD's +// InputSection structures. +class DwarfObject final : public llvm::DWARFObject { +public: + bool isLittleEndian() const override { return true; } + + llvm::Optional<llvm::RelocAddrEntry> find(const llvm::DWARFSection &sec, + uint64_t pos) const override { + // TODO: implement this + return llvm::None; + } + + void forEachInfoSections( + llvm::function_ref<void(const llvm::DWARFSection &)> f) const override { + f(infoSection); + } + + llvm::StringRef getAbbrevSection() const override { return abbrevSection; } + llvm::StringRef getStrSection() const override { return strSection; } + + // Returns an instance of DwarfObject if the given object file has the + // relevant DWARF debug sections. + static std::unique_ptr<DwarfObject> create(ObjFile *); + +private: + llvm::DWARFSection infoSection; + llvm::StringRef abbrevSection; + llvm::StringRef strSection; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/contrib/llvm-project/lld/MachO/ExportTrie.cpp b/contrib/llvm-project/lld/MachO/ExportTrie.cpp index 7cc81bcfd5f1..bd0c0004309c 100644 --- a/contrib/llvm-project/lld/MachO/ExportTrie.cpp +++ b/contrib/llvm-project/lld/MachO/ExportTrie.cpp @@ -59,7 +59,22 @@ struct Edge { struct ExportInfo { uint64_t address; - // TODO: Add proper support for re-exports & stub-and-resolver flags. + uint8_t flags = 0; + ExportInfo(const Symbol &sym, uint64_t imageBase) + : address(sym.getVA() - imageBase) { + // Set the symbol type. + if (sym.isWeakDef()) + flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + // TODO: Add proper support for re-exports & stub-and-resolver flags. + + // Set the symbol kind. + if (sym.isTlv()) { + flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL; + } else if (auto *defined = dyn_cast<Defined>(&sym)) { + if (defined->isAbsolute()) + flags |= EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE; + } + } }; } // namespace @@ -83,9 +98,8 @@ bool TrieNode::updateOffset(size_t &nextOffset) { // node. size_t nodeSize; if (info) { - uint64_t flags = 0; uint32_t terminalSize = - getULEB128Size(flags) + getULEB128Size(info->address); + getULEB128Size(info->flags) + getULEB128Size(info->address); // Overall node size so far is the uleb128 size of the length of the symbol // info + the symbol info itself. nodeSize = terminalSize + getULEB128Size(terminalSize); @@ -110,11 +124,10 @@ void TrieNode::writeTo(uint8_t *buf) const { buf += offset; if (info) { // TrieNodes with Symbol info: size, flags address - uint64_t flags = 0; // TODO: emit proper flags uint32_t terminalSize = - getULEB128Size(flags) + getULEB128Size(info->address); + getULEB128Size(info->flags) + getULEB128Size(info->address); buf += encodeULEB128(terminalSize, buf); - buf += encodeULEB128(flags, buf); + buf += encodeULEB128(info->flags, buf); buf += encodeULEB128(info->address, buf); } else { // TrieNode with no Symbol info. @@ -194,7 +207,7 @@ tailcall: if (isTerminal) { assert(j - i == 1); // no duplicate symbols - node->info = {pivotSymbol->getVA()}; + node->info = ExportInfo(*pivotSymbol, imageBase); } else { // This is the tail-call-optimized version of the following: // sortAndBuild(vec.slice(i, j - i), node, lastPos, pos + 1); diff --git a/contrib/llvm-project/lld/MachO/ExportTrie.h b/contrib/llvm-project/lld/MachO/ExportTrie.h index 2bd8c33db9a0..a43f4f2cce98 100644 --- a/contrib/llvm-project/lld/MachO/ExportTrie.h +++ b/contrib/llvm-project/lld/MachO/ExportTrie.h @@ -22,6 +22,7 @@ class Symbol; class TrieBuilder { public: + void setImageBase(uint64_t addr) { imageBase = addr; } void addSymbol(const Symbol &sym) { exported.push_back(&sym); } // Returns the size in bytes of the serialized trie. size_t build(); @@ -32,6 +33,7 @@ private: void sortAndBuild(llvm::MutableArrayRef<const Symbol *> vec, TrieNode *node, size_t lastPos, size_t pos); + uint64_t imageBase = 0; std::vector<const Symbol *> exported; std::vector<TrieNode *> nodes; }; diff --git a/contrib/llvm-project/lld/MachO/InputFiles.cpp b/contrib/llvm-project/lld/MachO/InputFiles.cpp index 46fe82f98822..3d4d98b51606 100644 --- a/contrib/llvm-project/lld/MachO/InputFiles.cpp +++ b/contrib/llvm-project/lld/MachO/InputFiles.cpp @@ -43,20 +43,29 @@ #include "InputFiles.h" #include "Config.h" +#include "Driver.h" +#include "Dwarf.h" #include "ExportTrie.h" #include "InputSection.h" #include "MachOStructs.h" +#include "ObjC.h" #include "OutputSection.h" +#include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" #include "Target.h" +#include "lld/Common/DWARF.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Reproduce.h" +#include "llvm/ADT/iterator.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/LTO/LTO.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TarWriter.h" using namespace llvm; using namespace llvm::MachO; @@ -65,7 +74,20 @@ using namespace llvm::sys; using namespace lld; using namespace lld::macho; -std::vector<InputFile *> macho::inputFiles; +// Returns "<internal>", "foo.a(bar.o)", or "baz.o". +std::string lld::toString(const InputFile *f) { + if (!f) + return "<internal>"; + if (f->archiveName.empty()) + return std::string(f->getName()); + return (path::filename(f->archiveName) + "(" + path::filename(f->getName()) + + ")") + .str(); +} + +SetVector<InputFile *> macho::inputFiles; +std::unique_ptr<TarWriter> macho::tar; +int InputFile::idCount = 0; // Open a given file path and return it as a memory-mapped file. Optional<MemoryBufferRef> macho::readFile(StringRef path) { @@ -83,8 +105,11 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) { // If this is a regular non-fat file, return it. const char *buf = mbref.getBufferStart(); auto *hdr = reinterpret_cast<const MachO::fat_header *>(buf); - if (read32be(&hdr->magic) != MachO::FAT_MAGIC) + if (read32be(&hdr->magic) != MachO::FAT_MAGIC) { + if (tar) + tar->append(relativeToRoot(path), mbref.getBuffer()); return mbref; + } // Object files and archive files may be fat files, which contains // multiple real files for different CPU ISAs. Here, we search for a @@ -107,6 +132,8 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) { uint32_t size = read32be(&arch[i].size); if (offset + size > mbref.getBufferSize()) error(path + ": slice extends beyond end of file"); + if (tar) + tar->append(relativeToRoot(path), mbref.getBuffer()); return MemoryBufferRef(StringRef(buf + offset, size), path.copy(bAlloc)); } @@ -114,7 +141,7 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) { return None; } -static const load_command *findCommand(const mach_header_64 *hdr, +const load_command *macho::findCommand(const mach_header_64 *hdr, uint32_t type) { const uint8_t *p = reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64); @@ -128,15 +155,17 @@ static const load_command *findCommand(const mach_header_64 *hdr, return nullptr; } -void InputFile::parseSections(ArrayRef<section_64> sections) { +void ObjFile::parseSections(ArrayRef<section_64> sections) { subsections.reserve(sections.size()); auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); for (const section_64 &sec : sections) { InputSection *isec = make<InputSection>(); isec->file = this; - isec->name = StringRef(sec.sectname, strnlen(sec.sectname, 16)); - isec->segname = StringRef(sec.segname, strnlen(sec.segname, 16)); + isec->name = + StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname))); + isec->segname = + StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname))); isec->data = {isZeroFill(sec.flags) ? nullptr : buf + sec.offset, static_cast<size_t>(sec.size)}; if (sec.align >= 32) @@ -145,7 +174,18 @@ void InputFile::parseSections(ArrayRef<section_64> sections) { else isec->align = 1 << sec.align; isec->flags = sec.flags; - subsections.push_back({{0, isec}}); + + if (!(isDebugSection(isec->flags) && + isec->segname == segment_names::dwarf)) { + subsections.push_back({{0, isec}}); + } else { + // Instead of emitting DWARF sections, we emit STABS symbols to the + // object files that contain them. We filter them out early to avoid + // parsing their relocations unnecessarily. But we must still push an + // empty map to ensure the indices line up for the remaining sections. + subsections.push_back({}); + debugSections.push_back(isec); + } } } @@ -163,96 +203,162 @@ static InputSection *findContainingSubsection(SubsectionMap &map, return it->second; } -void InputFile::parseRelocations(const section_64 &sec, - SubsectionMap &subsecMap) { +void ObjFile::parseRelocations(const section_64 &sec, + SubsectionMap &subsecMap) { auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); - ArrayRef<any_relocation_info> relInfos( - reinterpret_cast<const any_relocation_info *>(buf + sec.reloff), - sec.nreloc); - - for (const any_relocation_info &anyRel : relInfos) { - if (anyRel.r_word0 & R_SCATTERED) + ArrayRef<relocation_info> relInfos( + reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc); + + for (size_t i = 0; i < relInfos.size(); i++) { + // Paired relocations serve as Mach-O's method for attaching a + // supplemental datum to a primary relocation record. ELF does not + // need them because the *_RELOC_RELA records contain the extra + // addend field, vs. *_RELOC_REL which omit the addend. + // + // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend, + // and the paired *_RELOC_UNSIGNED record holds the minuend. The + // datum for each is a symbolic address. The result is the runtime + // offset between two addresses. + // + // The ARM64_RELOC_ADDEND record holds the addend, and the paired + // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the + // base symbolic address. + // + // Note: X86 does not use *_RELOC_ADDEND because it can embed an + // addend into the instruction stream. On X86, a relocatable address + // field always occupies an entire contiguous sequence of byte(s), + // so there is no need to merge opcode bits with address + // bits. Therefore, it's easy and convenient to store addends in the + // instruction-stream bytes that would otherwise contain zeroes. By + // contrast, RISC ISAs such as ARM64 mix opcode bits with with + // address bits so that bitwise arithmetic is necessary to extract + // and insert them. Storing addends in the instruction stream is + // possible, but inconvenient and more costly at link time. + + relocation_info pairedInfo = relInfos[i]; + relocation_info relInfo = + target->isPairedReloc(pairedInfo) ? relInfos[++i] : pairedInfo; + assert(i < relInfos.size()); + if (relInfo.r_address & R_SCATTERED) fatal("TODO: Scattered relocations not supported"); - auto rel = reinterpret_cast<const relocation_info &>(anyRel); - Reloc r; - r.type = rel.r_type; - r.pcrel = rel.r_pcrel; - r.length = rel.r_length; - uint64_t rawAddend = target->getImplicitAddend(mb, sec, rel); - - if (rel.r_extern) { - r.target = symbols[rel.r_symbolnum]; + r.type = relInfo.r_type; + r.pcrel = relInfo.r_pcrel; + r.length = relInfo.r_length; + r.offset = relInfo.r_address; + // For unpaired relocs, pairdInfo (just a copy of relInfo) is ignored + uint64_t rawAddend = target->getAddend(mb, sec, relInfo, pairedInfo); + if (relInfo.r_extern) { + r.referent = symbols[relInfo.r_symbolnum]; r.addend = rawAddend; } else { - if (rel.r_symbolnum == 0 || rel.r_symbolnum > subsections.size()) - fatal("invalid section index in relocation for offset " + - std::to_string(r.offset) + " in section " + sec.sectname + - " of " + getName()); - - SubsectionMap &targetSubsecMap = subsections[rel.r_symbolnum - 1]; - const section_64 &targetSec = sectionHeaders[rel.r_symbolnum - 1]; - uint32_t targetOffset; - if (rel.r_pcrel) { + SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1]; + const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1]; + uint32_t referentOffset; + if (relInfo.r_pcrel) { // The implicit addend for pcrel section relocations is the pcrel offset // in terms of the addresses in the input file. Here we adjust it so - // that it describes the offset from the start of the target section. + // that it describes the offset from the start of the referent section. // TODO: The offset of 4 is probably not right for ARM64, nor for // relocations with r_length != 2. - targetOffset = - sec.addr + rel.r_address + 4 + rawAddend - targetSec.addr; + referentOffset = + sec.addr + relInfo.r_address + 4 + rawAddend - referentSec.addr; } else { // The addend for a non-pcrel relocation is its absolute address. - targetOffset = rawAddend - targetSec.addr; + referentOffset = rawAddend - referentSec.addr; } - r.target = findContainingSubsection(targetSubsecMap, &targetOffset); - r.addend = targetOffset; + r.referent = findContainingSubsection(referentSubsecMap, &referentOffset); + r.addend = referentOffset; } - r.offset = rel.r_address; InputSection *subsec = findContainingSubsection(subsecMap, &r.offset); subsec->relocs.push_back(r); } } -void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList, - const char *strtab, bool subsectionsViaSymbols) { +static macho::Symbol *createDefined(const structs::nlist_64 &sym, + StringRef name, InputSection *isec, + uint32_t value) { + // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT): + // N_EXT: Global symbols + // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped + // N_PEXT: Does not occur in input files in practice, + // a private extern must be external. + // 0: Translation-unit scoped. These are not in the symbol table. + + if (sym.n_type & (N_EXT | N_PEXT)) { + assert((sym.n_type & N_EXT) && "invalid input"); + return symtab->addDefined(name, isec, value, sym.n_desc & N_WEAK_DEF, + sym.n_type & N_PEXT); + } + return make<Defined>(name, isec, value, sym.n_desc & N_WEAK_DEF, + /*isExternal=*/false, /*isPrivateExtern=*/false); +} + +// Absolute symbols are defined symbols that do not have an associated +// InputSection. They cannot be weak. +static macho::Symbol *createAbsolute(const structs::nlist_64 &sym, + StringRef name) { + if (sym.n_type & (N_EXT | N_PEXT)) { + assert((sym.n_type & N_EXT) && "invalid input"); + return symtab->addDefined(name, nullptr, sym.n_value, /*isWeakDef=*/false, + sym.n_type & N_PEXT); + } + return make<Defined>(name, nullptr, sym.n_value, /*isWeakDef=*/false, + /*isExternal=*/false, /*isPrivateExtern=*/false); +} + +macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym, + StringRef name) { + uint8_t type = sym.n_type & N_TYPE; + switch (type) { + case N_UNDF: + return sym.n_value == 0 + ? symtab->addUndefined(name, sym.n_desc & N_WEAK_REF) + : symtab->addCommon(name, this, sym.n_value, + 1 << GET_COMM_ALIGN(sym.n_desc), + sym.n_type & N_PEXT); + case N_ABS: + return createAbsolute(sym, name); + case N_PBUD: + case N_INDR: + error("TODO: support symbols of type " + std::to_string(type)); + return nullptr; + case N_SECT: + llvm_unreachable( + "N_SECT symbols should not be passed to parseNonSectionSymbol"); + default: + llvm_unreachable("invalid symbol type"); + } +} + +void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList, + const char *strtab, bool subsectionsViaSymbols) { // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols // out-of-sequence. symbols.resize(nList.size()); std::vector<size_t> altEntrySymIdxs; - auto createDefined = [&](const structs::nlist_64 &sym, InputSection *isec, - uint32_t value) -> Symbol * { - StringRef name = strtab + sym.n_strx; - if (sym.n_type & N_EXT) - // Global defined symbol - return symtab->addDefined(name, isec, value); - else - // Local defined symbol - return make<Defined>(name, isec, value); - }; - for (size_t i = 0, n = nList.size(); i < n; ++i) { const structs::nlist_64 &sym = nList[i]; + StringRef name = strtab + sym.n_strx; - // Undefined symbol - if (!sym.n_sect) { - StringRef name = strtab + sym.n_strx; - symbols[i] = symtab->addUndefined(name); + if ((sym.n_type & N_TYPE) != N_SECT) { + symbols[i] = parseNonSectionSymbol(sym, name); continue; } const section_64 &sec = sectionHeaders[sym.n_sect - 1]; SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; + assert(!subsecMap.empty()); uint64_t offset = sym.n_value - sec.addr; // If the input file does not use subsections-via-symbols, all symbols can // use the same subsection. Otherwise, we must split the sections along // symbol boundaries. if (!subsectionsViaSymbols) { - symbols[i] = createDefined(sym, subsecMap[0], offset); + symbols[i] = createDefined(sym, name, subsecMap[0], offset); continue; } @@ -274,7 +380,7 @@ void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList, if (firstSize == 0) { // Alias of an existing symbol, or the first symbol in the section. These // are handled by reusing the existing section. - symbols[i] = createDefined(sym, firstIsec, 0); + symbols[i] = createDefined(sym, name, firstIsec, 0); continue; } @@ -290,22 +396,45 @@ void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList, subsecMap[offset] = secondIsec; // By construction, the symbol will be at offset zero in the new section. - symbols[i] = createDefined(sym, secondIsec, 0); + symbols[i] = createDefined(sym, name, secondIsec, 0); } for (size_t idx : altEntrySymIdxs) { const structs::nlist_64 &sym = nList[idx]; + StringRef name = strtab + sym.n_strx; SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr; InputSection *subsec = findContainingSubsection(subsecMap, &off); - symbols[idx] = createDefined(sym, subsec, off); + symbols[idx] = createDefined(sym, name, subsec, off); } } -ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) { +OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName, + StringRef sectName) + : InputFile(OpaqueKind, mb) { + InputSection *isec = make<InputSection>(); + isec->file = this; + isec->name = sectName.take_front(16); + isec->segname = segName.take_front(16); + const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); + isec->data = {buf, mb.getBufferSize()}; + subsections.push_back({{0, isec}}); +} + +ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName) + : InputFile(ObjKind, mb), modTime(modTime) { + this->archiveName = std::string(archiveName); + auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart()); + if (const load_command *cmd = findCommand(hdr, LC_LINKER_OPTION)) { + auto *c = reinterpret_cast<const linker_option_command *>(cmd); + StringRef data{reinterpret_cast<const char *>(c + 1), + c->cmdsize - sizeof(linker_option_command)}; + parseLCLinkerOption(this, c->count, data); + } + if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) { auto *c = reinterpret_cast<const segment_command_64 *>(cmd); sectionHeaders = ArrayRef<section_64>{ @@ -326,11 +455,112 @@ ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) { // The relocations may refer to the symbols, so we parse them after we have // parsed all the symbols. for (size_t i = 0, n = subsections.size(); i < n; ++i) - parseRelocations(sectionHeaders[i], subsections[i]); + if (!subsections[i].empty()) + parseRelocations(sectionHeaders[i], subsections[i]); + + parseDebugInfo(); +} + +void ObjFile::parseDebugInfo() { + std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this); + if (!dObj) + return; + + auto *ctx = make<DWARFContext>( + std::move(dObj), "", + [&](Error err) { + warn(toString(this) + ": " + toString(std::move(err))); + }, + [&](Error warning) { + warn(toString(this) + ": " + toString(std::move(warning))); + }); + + // TODO: Since object files can contain a lot of DWARF info, we should verify + // that we are parsing just the info we need + const DWARFContext::compile_unit_range &units = ctx->compile_units(); + auto it = units.begin(); + compileUnit = it->get(); + assert(std::next(it) == units.end()); +} + +// The path can point to either a dylib or a .tbd file. +static Optional<DylibFile *> loadDylib(StringRef path, DylibFile *umbrella) { + Optional<MemoryBufferRef> mbref = readFile(path); + if (!mbref) { + error("could not read dylib file at " + path); + return {}; + } + return loadDylib(*mbref, umbrella); +} + +// TBD files are parsed into a series of TAPI documents (InterfaceFiles), with +// the first document storing child pointers to the rest of them. When we are +// processing a given TBD file, we store that top-level document here. When +// processing re-exports, we search its children for potentially matching +// documents in the same TBD file. Note that the children themselves don't +// point to further documents, i.e. this is a two-level tree. +// +// ld64 allows a TAPI re-export to reference documents nested within other TBD +// files, but that seems like a strange design, so this is an intentional +// deviation. +const InterfaceFile *currentTopLevelTapi = nullptr; + +// Re-exports can either refer to on-disk files, or to documents within .tbd +// files. +static Optional<DylibFile *> loadReexportHelper(StringRef path, + DylibFile *umbrella) { + if (path::is_absolute(path, path::Style::posix)) + for (StringRef root : config->systemLibraryRoots) + if (Optional<std::string> dylibPath = + resolveDylibPath((root + path).str())) + return loadDylib(*dylibPath, umbrella); + + // TODO: Expand @loader_path, @executable_path etc + + if (currentTopLevelTapi) { + for (InterfaceFile &child : + make_pointee_range(currentTopLevelTapi->documents())) { + if (path == child.getInstallName()) + return make<DylibFile>(child, umbrella); + assert(child.documents().empty()); + } + } + + if (Optional<std::string> dylibPath = resolveDylibPath(path)) + return loadDylib(*dylibPath, umbrella); + + error("unable to locate re-export with install name " + path); + return {}; +} + +// If a re-exported dylib is public (lives in /usr/lib or +// /System/Library/Frameworks), then it is considered implicitly linked: we +// should bind to its symbols directly instead of via the re-exporting umbrella +// library. +static bool isImplicitlyLinked(StringRef path) { + if (!config->implicitDylibs) + return false; + + if (path::parent_path(path) == "/usr/lib") + return true; + + // Match /System/Library/Frameworks/$FOO.framework/**/$FOO + if (path.consume_front("/System/Library/Frameworks/")) { + StringRef frameworkName = path.take_until([](char c) { return c == '.'; }); + return path::filename(path) == frameworkName; + } + + return false; +} + +void loadReexport(StringRef path, DylibFile *umbrella) { + Optional<DylibFile *> reexport = loadReexportHelper(path, umbrella); + if (reexport && isImplicitlyLinked(path)) + inputFiles.insert(*reexport); } DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella) - : InputFile(DylibKind, mb) { + : InputFile(DylibKind, mb), refState(RefState::Unreferenced) { if (umbrella == nullptr) umbrella = this; @@ -340,21 +570,27 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella) // Initialize dylibName. if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) { auto *c = reinterpret_cast<const dylib_command *>(cmd); + currentVersion = read32le(&c->dylib.current_version); + compatibilityVersion = read32le(&c->dylib.compatibility_version); dylibName = reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name); } else { - error("dylib " + getName() + " missing LC_ID_DYLIB load command"); + error("dylib " + toString(this) + " missing LC_ID_DYLIB load command"); return; } // Initialize symbols. + DylibFile *exportingFile = isImplicitlyLinked(dylibName) ? this : umbrella; if (const load_command *cmd = findCommand(hdr, LC_DYLD_INFO_ONLY)) { auto *c = reinterpret_cast<const dyld_info_command *>(cmd); parseTrie(buf + c->export_off, c->export_size, [&](const Twine &name, uint64_t flags) { - symbols.push_back(symtab->addDylib(saver.save(name), umbrella)); + bool isWeakDef = flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + bool isTlv = flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL; + symbols.push_back(symtab->addDylib( + saver.save(name), exportingFile, isWeakDef, isTlv)); }); } else { - error("LC_DYLD_INFO_ONLY not found in " + getName()); + error("LC_DYLD_INFO_ONLY not found in " + toString(this)); return; } @@ -372,36 +608,63 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella) auto *c = reinterpret_cast<const dylib_command *>(cmd); StringRef reexportPath = reinterpret_cast<const char *>(c) + read32le(&c->dylib.name); - // TODO: Expand @loader_path, @executable_path etc in reexportPath - Optional<MemoryBufferRef> buffer = readFile(reexportPath); - if (!buffer) { - error("unable to read re-exported dylib at " + reexportPath); - return; - } - reexported.push_back(make<DylibFile>(*buffer, umbrella)); + loadReexport(reexportPath, umbrella); } } -DylibFile::DylibFile(std::shared_ptr<llvm::MachO::InterfaceFile> interface, - DylibFile *umbrella) - : InputFile(DylibKind, MemoryBufferRef()) { +DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella) + : InputFile(DylibKind, interface), refState(RefState::Unreferenced) { if (umbrella == nullptr) umbrella = this; - dylibName = saver.save(interface->getInstallName()); + dylibName = saver.save(interface.getInstallName()); + compatibilityVersion = interface.getCompatibilityVersion().rawValue(); + currentVersion = interface.getCurrentVersion().rawValue(); + DylibFile *exportingFile = isImplicitlyLinked(dylibName) ? this : umbrella; + auto addSymbol = [&](const Twine &name) -> void { + symbols.push_back(symtab->addDylib(saver.save(name), exportingFile, + /*isWeakDef=*/false, + /*isTlv=*/false)); + }; // TODO(compnerd) filter out symbols based on the target platform - for (const auto symbol : interface->symbols()) - if (symbol->getArchitectures().has(config->arch)) - symbols.push_back( - symtab->addDylib(saver.save(symbol->getName()), umbrella)); - // TODO(compnerd) properly represent the hierarchy of the documents as it is - // in theory possible to have re-exported dylibs from re-exported dylibs which - // should be parent'ed to the child. - for (auto document : interface->documents()) - reexported.push_back(make<DylibFile>(document, umbrella)); + // TODO: handle weak defs, thread locals + for (const auto symbol : interface.symbols()) { + if (!symbol->getArchitectures().has(config->arch)) + continue; + + switch (symbol->getKind()) { + case SymbolKind::GlobalSymbol: + addSymbol(symbol->getName()); + break; + case SymbolKind::ObjectiveCClass: + // XXX ld64 only creates these symbols when -ObjC is passed in. We may + // want to emulate that. + addSymbol(objc::klass + symbol->getName()); + addSymbol(objc::metaclass + symbol->getName()); + break; + case SymbolKind::ObjectiveCClassEHType: + addSymbol(objc::ehtype + symbol->getName()); + break; + case SymbolKind::ObjectiveCInstanceVariable: + addSymbol(objc::ivar + symbol->getName()); + break; + } + } + + bool isTopLevelTapi = false; + if (currentTopLevelTapi == nullptr) { + currentTopLevelTapi = &interface; + isTopLevelTapi = true; + } + + for (InterfaceFileRef intfRef : interface.reexportedLibraries()) + loadReexport(intfRef.getInstallName(), umbrella); + + if (isTopLevelTapi) + currentTopLevelTapi = nullptr; } -ArchiveFile::ArchiveFile(std::unique_ptr<llvm::object::Archive> &&f) +ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f) : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) { for (const object::Archive::Symbol &sym : file->symbols()) symtab->addLazy(sym.getName(), this, sym); @@ -411,7 +674,7 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) { object::Archive::Child c = CHECK(sym.getMember(), toString(this) + ": could not get the member for symbol " + - sym.getName()); + toMachOString(sym)); if (!seen.insert(c.getChildOffset()).second) return; @@ -420,14 +683,45 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) { CHECK(c.getMemoryBufferRef(), toString(this) + ": could not get the buffer for the member defining symbol " + - sym.getName()); - auto file = make<ObjFile>(mb); - symbols.insert(symbols.end(), file->symbols.begin(), file->symbols.end()); - subsections.insert(subsections.end(), file->subsections.begin(), - file->subsections.end()); + toMachOString(sym)); + + if (tar && c.getParent()->isThin()) + tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb.getBuffer()); + + uint32_t modTime = toTimeT( + CHECK(c.getLastModified(), toString(this) + + ": could not get the modification time " + "for the member defining symbol " + + toMachOString(sym))); + + // `sym` is owned by a LazySym, which will be replace<>() by make<ObjFile> + // and become invalid after that call. Copy it to the stack so we can refer + // to it later. + const object::Archive::Symbol sym_copy = sym; + + InputFile *file; + switch (identify_magic(mb.getBuffer())) { + case file_magic::macho_object: + file = make<ObjFile>(mb, modTime, getName()); + break; + case file_magic::bitcode: + file = make<BitcodeFile>(mb); + break; + default: + StringRef bufname = + CHECK(c.getName(), toString(this) + ": could not get buffer name"); + error(toString(this) + ": archive member " + bufname + + " has unhandled file type"); + return; + } + inputFiles.insert(file); + + // ld64 doesn't demangle sym here even with -demangle. Match that, so + // intentionally no call to toMachOString() here. + printArchiveMemberLoad(sym_copy.getName(), file); } -// Returns "<internal>" or "baz.o". -std::string lld::toString(const InputFile *file) { - return file ? std::string(file->getName()) : "<internal>"; +BitcodeFile::BitcodeFile(MemoryBufferRef mbref) + : InputFile(BitcodeKind, mbref) { + obj = check(lto::InputFile::create(mbref)); } diff --git a/contrib/llvm-project/lld/MachO/InputFiles.h b/contrib/llvm-project/lld/MachO/InputFiles.h index bc5ad86ccaa9..ef573145f594 100644 --- a/contrib/llvm-project/lld/MachO/InputFiles.h +++ b/contrib/llvm-project/lld/MachO/InputFiles.h @@ -12,8 +12,11 @@ #include "MachOStructs.h" #include "lld/Common/LLVM.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Object/Archive.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/TextAPI/MachO/InterfaceFile.h" @@ -22,12 +25,24 @@ #include <map> #include <vector> +namespace llvm { +namespace lto { +class InputFile; +} // namespace lto +class TarWriter; +} // namespace llvm + namespace lld { namespace macho { class InputSection; class Symbol; struct Reloc; +enum class RefState : uint8_t; + +// If --reproduce option is given, all input files are written +// to this tar archive. +extern std::unique_ptr<llvm::TarWriter> tar; // If .subsections_via_symbols is set, each InputSection will be split along // symbol boundaries. The keys of a SubsectionMap represent the offsets of @@ -38,46 +53,71 @@ class InputFile { public: enum Kind { ObjKind, + OpaqueKind, DylibKind, ArchiveKind, + BitcodeKind, }; virtual ~InputFile() = default; Kind kind() const { return fileKind; } - StringRef getName() const { return mb.getBufferIdentifier(); } + StringRef getName() const { return name; } MemoryBufferRef mb; + std::vector<Symbol *> symbols; - ArrayRef<llvm::MachO::section_64> sectionHeaders; std::vector<SubsectionMap> subsections; + // Provides an easy way to sort InputFiles deterministically. + const int id; -protected: - InputFile(Kind kind, MemoryBufferRef mb) : mb(mb), fileKind(kind) {} - - void parseSections(ArrayRef<llvm::MachO::section_64>); + // If not empty, this stores the name of the archive containing this file. + // We use this string for creating error messages. + std::string archiveName; - void parseSymbols(ArrayRef<lld::structs::nlist_64> nList, const char *strtab, - bool subsectionsViaSymbols); +protected: + InputFile(Kind kind, MemoryBufferRef mb) + : mb(mb), id(idCount++), fileKind(kind), name(mb.getBufferIdentifier()) {} - void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &); + InputFile(Kind kind, const llvm::MachO::InterfaceFile &interface) + : id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {} private: const Kind fileKind; + const StringRef name; + + static int idCount; }; // .o file class ObjFile : public InputFile { public: - explicit ObjFile(MemoryBufferRef mb); + ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName); static bool classof(const InputFile *f) { return f->kind() == ObjKind; } + + llvm::DWARFUnit *compileUnit = nullptr; + const uint32_t modTime; + ArrayRef<llvm::MachO::section_64> sectionHeaders; + std::vector<InputSection *> debugSections; + +private: + void parseSections(ArrayRef<llvm::MachO::section_64>); + void parseSymbols(ArrayRef<lld::structs::nlist_64> nList, const char *strtab, + bool subsectionsViaSymbols); + Symbol *parseNonSectionSymbol(const structs::nlist_64 &sym, StringRef name); + void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &); + void parseDebugInfo(); +}; + +// command-line -sectcreate file +class OpaqueFile : public InputFile { +public: + OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName); + static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; } }; // .dylib file class DylibFile : public InputFile { public: - explicit DylibFile(std::shared_ptr<llvm::MachO::InterfaceFile> interface, - DylibFile *umbrella = nullptr); - // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the // symbols in those sub-libraries will be available under the umbrella // library's namespace. Those sub-libraries can also have their own @@ -87,12 +127,18 @@ public: // (through an -lfoo flag), then `umbrella` should be a nullptr. explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella = nullptr); + explicit DylibFile(const llvm::MachO::InterfaceFile &interface, + DylibFile *umbrella = nullptr); + static bool classof(const InputFile *f) { return f->kind() == DylibKind; } StringRef dylibName; + uint32_t compatibilityVersion = 0; + uint32_t currentVersion = 0; uint64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel + RefState refState; bool reexport = false; - std::vector<DylibFile *> reexported; + bool forceWeakImport = false; }; // .a file @@ -109,10 +155,21 @@ private: llvm::DenseSet<uint64_t> seen; }; -extern std::vector<InputFile *> inputFiles; +class BitcodeFile : public InputFile { +public: + explicit BitcodeFile(MemoryBufferRef mb); + static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } + + std::unique_ptr<llvm::lto::InputFile> obj; +}; + +extern llvm::SetVector<InputFile *> inputFiles; llvm::Optional<MemoryBufferRef> readFile(StringRef path); +const llvm::MachO::load_command * +findCommand(const llvm::MachO::mach_header_64 *, uint32_t type); + } // namespace macho std::string toString(const macho::InputFile *file); diff --git a/contrib/llvm-project/lld/MachO/InputSection.cpp b/contrib/llvm-project/lld/MachO/InputSection.cpp index 72d489283051..9287d49dae3c 100644 --- a/contrib/llvm-project/lld/MachO/InputSection.cpp +++ b/contrib/llvm-project/lld/MachO/InputSection.cpp @@ -7,9 +7,11 @@ //===----------------------------------------------------------------------===// #include "InputSection.h" +#include "InputFiles.h" #include "OutputSegment.h" #include "Symbols.h" #include "Target.h" +#include "Writer.h" #include "lld/Common/Memory.h" #include "llvm/Support/Endian.h" @@ -25,6 +27,10 @@ uint64_t InputSection::getFileOffset() const { return parent->fileOff + outSecFileOff; } +uint64_t InputSection::getFileSize() const { + return isZeroFill(flags) ? 0 : getSize(); +} + uint64_t InputSection::getVA() const { return parent->addr + outSecOff; } void InputSection::writeTo(uint8_t *buf) { @@ -34,15 +40,47 @@ void InputSection::writeTo(uint8_t *buf) { memcpy(buf, data.data(), data.size()); for (Reloc &r : relocs) { - uint64_t va = 0; - if (auto *s = r.target.dyn_cast<Symbol *>()) - va = target->getSymbolVA(*s, r.type); - else if (auto *isec = r.target.dyn_cast<InputSection *>()) - va = isec->getVA(); + uint64_t referentVA = 0; + if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { + referentVA = + target->resolveSymbolVA(buf + r.offset, *referentSym, r.type); + + if (isThreadLocalVariables(flags)) { + // References from thread-local variable sections are treated as offsets + // relative to the start of the thread-local data memory area, which + // is initialized via copying all the TLV data sections (which are all + // contiguous). + if (isa<Defined>(referentSym)) + referentVA -= firstTLVDataSection->addr; + } + } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { + referentVA = referentIsec->getVA(); + } - uint64_t val = va + r.addend; + uint64_t referentVal = referentVA + r.addend; if (r.pcrel) - val -= getVA() + r.offset; - target->relocateOne(buf + r.offset, r, val); + referentVal -= getVA() + r.offset; + target->relocateOne(buf + r.offset, r, referentVal); } } + +bool macho::isCodeSection(InputSection *isec) { + uint32_t type = isec->flags & MachO::SECTION_TYPE; + if (type != S_REGULAR && type != S_COALESCED) + return false; + + uint32_t attr = isec->flags & MachO::SECTION_ATTRIBUTES_USR; + if (attr == S_ATTR_PURE_INSTRUCTIONS) + return true; + + if (isec->segname == segment_names::text) + return StringSwitch<bool>(isec->name) + .Cases("__textcoal_nt", "__StaticInit", true) + .Default(false); + + return false; +} + +std::string lld::toString(const InputSection *isec) { + return (toString(isec->file) + ":(" + isec->name + ")").str(); +} diff --git a/contrib/llvm-project/lld/MachO/InputSection.h b/contrib/llvm-project/lld/MachO/InputSection.h index 96ae0cbe6ea4..00b523fb8d46 100644 --- a/contrib/llvm-project/lld/MachO/InputSection.h +++ b/contrib/llvm-project/lld/MachO/InputSection.h @@ -29,23 +29,17 @@ struct Reloc { // The offset from the start of the subsection that this relocation belongs // to. uint32_t offset; - // Adding this offset to the address of the target symbol or subsection gives - // the destination that this relocation refers to. + // Adding this offset to the address of the referent symbol or subsection + // gives the destination that this relocation refers to. uint64_t addend; - llvm::PointerUnion<Symbol *, InputSection *> target; + llvm::PointerUnion<Symbol *, InputSection *> referent; }; -inline bool isZeroFill(uint8_t flags) { - return (flags & llvm::MachO::SECTION_TYPE) == llvm::MachO::S_ZEROFILL; -} - class InputSection { public: virtual ~InputSection() = default; virtual uint64_t getSize() const { return data.size(); } - virtual uint64_t getFileSize() const { - return isZeroFill(flags) ? 0 : getSize(); - } + virtual uint64_t getFileSize() const; uint64_t getFileOffset() const; uint64_t getVA() const; @@ -66,9 +60,37 @@ public: std::vector<Reloc> relocs; }; +inline uint8_t sectionType(uint32_t flags) { + return flags & llvm::MachO::SECTION_TYPE; +} + +inline bool isZeroFill(uint32_t flags) { + return llvm::MachO::isVirtualSection(sectionType(flags)); +} + +inline bool isThreadLocalVariables(uint32_t flags) { + return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES; +} + +// These sections contain the data for initializing thread-local variables. +inline bool isThreadLocalData(uint32_t flags) { + return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR || + sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL; +} + +inline bool isDebugSection(uint32_t flags) { + return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) == + llvm::MachO::S_ATTR_DEBUG; +} + +bool isCodeSection(InputSection *); + extern std::vector<InputSection *> inputSections; } // namespace macho + +std::string toString(const macho::InputSection *); + } // namespace lld #endif diff --git a/contrib/llvm-project/lld/MachO/LTO.cpp b/contrib/llvm-project/lld/MachO/LTO.cpp new file mode 100644 index 000000000000..f48bc24df3d7 --- /dev/null +++ b/contrib/llvm-project/lld/MachO/LTO.cpp @@ -0,0 +1,110 @@ +//===- LTO.cpp ------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LTO.h" +#include "Config.h" +#include "Driver.h" +#include "InputFiles.h" + +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Strings.h" +#include "lld/Common/TargetOptionsCommandFlags.h" +#include "llvm/LTO/LTO.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/ObjCARC.h" + +using namespace lld; +using namespace lld::macho; +using namespace llvm; +using namespace llvm::sys; + +static lto::Config createConfig() { + lto::Config c; + c.Options = initTargetOptionsFromCodeGenFlags(); + c.CodeModel = getCodeModelFromCMModel(); + c.CPU = getCPUStr(); + c.MAttrs = getMAttrs(); + c.UseNewPM = config->ltoNewPassManager; + c.PreCodeGenPassesHook = [](legacy::PassManager &pm) { + pm.add(createObjCARCContractPass()); + }; + return c; +} + +BitcodeCompiler::BitcodeCompiler() { + auto backend = + lto::createInProcessThinBackend(heavyweight_hardware_concurrency()); + ltoObj = std::make_unique<lto::LTO>(createConfig(), backend); +} + +void BitcodeCompiler::add(BitcodeFile &f) { + ArrayRef<lto::InputFile::Symbol> objSyms = f.obj->symbols(); + std::vector<lto::SymbolResolution> resols; + resols.reserve(objSyms.size()); + + // Provide a resolution to the LTO API for each symbol. + for (const lto::InputFile::Symbol &objSym : objSyms) { + resols.emplace_back(); + lto::SymbolResolution &r = resols.back(); + + // Ideally we shouldn't check for SF_Undefined but currently IRObjectFile + // reports two symbols for module ASM defined. Without this check, lld + // flags an undefined in IR with a definition in ASM as prevailing. + // Once IRObjectFile is fixed to report only one symbol this hack can + // be removed. + r.Prevailing = !objSym.isUndefined(); + + // TODO: set the other resolution configs properly + r.VisibleToRegularObj = true; + } + checkError(ltoObj->add(std::move(f.obj), resols)); +} + +// Merge all the bitcode files we have seen, codegen the result +// and return the resulting ObjectFile(s). +std::vector<ObjFile *> BitcodeCompiler::compile() { + unsigned maxTasks = ltoObj->getMaxTasks(); + buf.resize(maxTasks); + + checkError(ltoObj->run([&](size_t task) { + return std::make_unique<lto::NativeObjectStream>( + std::make_unique<raw_svector_ostream>(buf[task])); + })); + + if (config->saveTemps) { + if (!buf[0].empty()) + saveBuffer(buf[0], config->outputFile + ".lto.o"); + for (unsigned i = 1; i != maxTasks; ++i) + saveBuffer(buf[i], config->outputFile + Twine(i) + ".lto.o"); + } + + if (!config->ltoObjPath.empty()) + fs::create_directories(config->ltoObjPath); + + std::vector<ObjFile *> ret; + for (unsigned i = 0; i != maxTasks; ++i) { + if (buf[i].empty()) { + continue; + } + SmallString<261> filePath("/tmp/lto.tmp"); + uint32_t modTime = 0; + if (!config->ltoObjPath.empty()) { + filePath = config->ltoObjPath; + path::append(filePath, Twine(i) + "." + + getArchitectureName(config->arch) + ".lto.o"); + saveBuffer(buf[i], filePath); + modTime = getModTime(filePath); + } + ret.push_back(make<ObjFile>( + MemoryBufferRef(buf[i], saver.save(filePath.str())), modTime, "")); + } + + return ret; +} diff --git a/contrib/llvm-project/lld/MachO/LTO.h b/contrib/llvm-project/lld/MachO/LTO.h new file mode 100644 index 000000000000..2577374590b7 --- /dev/null +++ b/contrib/llvm-project/lld/MachO/LTO.h @@ -0,0 +1,43 @@ +//===- LTO.h ----------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_LTO_H +#define LLD_MACHO_LTO_H + +#include "llvm/ADT/SmallString.h" +#include <memory> +#include <vector> + +namespace llvm { +namespace lto { +class LTO; +} // namespace lto +} // namespace llvm + +namespace lld { +namespace macho { + +class BitcodeFile; +class ObjFile; + +class BitcodeCompiler { +public: + BitcodeCompiler(); + + void add(BitcodeFile &f); + std::vector<ObjFile *> compile(); + +private: + std::unique_ptr<llvm::lto::LTO> ltoObj; + std::vector<llvm::SmallString<0>> buf; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/contrib/llvm-project/lld/MachO/ObjC.cpp b/contrib/llvm-project/lld/MachO/ObjC.cpp new file mode 100644 index 000000000000..21691ef5255b --- /dev/null +++ b/contrib/llvm-project/lld/MachO/ObjC.cpp @@ -0,0 +1,36 @@ +//===- ObjC.cpp -----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ObjC.h" +#include "InputFiles.h" +#include "OutputSegment.h" + +#include "llvm/BinaryFormat/MachO.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace lld; + +bool macho::hasObjCSection(MemoryBufferRef mb) { + auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart()); + if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) { + auto *c = reinterpret_cast<const segment_command_64 *>(cmd); + auto sectionHeaders = ArrayRef<section_64>{ + reinterpret_cast<const section_64 *>(c + 1), c->nsects}; + for (const section_64 &sec : sectionHeaders) { + StringRef sectname(sec.sectname, + strnlen(sec.sectname, sizeof(sec.sectname))); + StringRef segname(sec.segname, strnlen(sec.segname, sizeof(sec.segname))); + if ((segname == segment_names::data && sectname == "__objc_catlist") || + (segname == segment_names::text && sectname == "__swift")) { + return true; + } + } + } + return false; +} diff --git a/contrib/llvm-project/lld/MachO/ObjC.h b/contrib/llvm-project/lld/MachO/ObjC.h new file mode 100644 index 000000000000..8db459ad8e2b --- /dev/null +++ b/contrib/llvm-project/lld/MachO/ObjC.h @@ -0,0 +1,31 @@ +//===- ObjC.h ---------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_OBJC_H +#define LLD_MACHO_OBJC_H + +#include "llvm/Support/MemoryBuffer.h" + +namespace lld { +namespace macho { + +namespace objc { + +constexpr const char klass[] = "_OBJC_CLASS_$_"; +constexpr const char metaclass[] = "_OBJC_METACLASS_$_"; +constexpr const char ehtype[] = "_OBJC_EHTYPE_$_"; +constexpr const char ivar[] = "_OBJC_IVAR_$_"; + +} // namespace objc + +bool hasObjCSection(llvm::MemoryBufferRef); + +} // namespace macho +} // namespace lld + +#endif diff --git a/contrib/llvm-project/lld/MachO/Options.td b/contrib/llvm-project/lld/MachO/Options.td index 1e42542b9ac4..51c834c57238 100644 --- a/contrib/llvm-project/lld/MachO/Options.td +++ b/contrib/llvm-project/lld/MachO/Options.td @@ -1,8 +1,41 @@ include "llvm/Option/OptParser.td" -def help : Flag<["-", "--"], "help">; +// Flags that lld/MachO understands but ld64 doesn't. These take +// '--' instead of '-' and use dashes instead of underscores, so +// they don't collide with the ld64 compat options. +def grp_lld : OptionGroup<"kind">, HelpText<"LLD-SPECIFIC">; + +def help : Flag<["-", "--"], "help">, + Group<grp_lld>; def help_hidden : Flag<["--"], "help-hidden">, - HelpText<"Display help for hidden options">; + HelpText<"Display help for hidden options">, + Group<grp_lld>; +def color_diagnostics: Flag<["--"], "color-diagnostics">, + HelpText<"Alias for --color-diagnostics=always">, + Group<grp_lld>; +def no_color_diagnostics: Flag<["--"], "no-color-diagnostics">, + HelpText<"Alias for --color-diagnostics=never">, + Group<grp_lld>; +def color_diagnostics_eq: Joined<["--"], "color-diagnostics=">, + HelpText<"Use colors in diagnostics (default: auto)">, + MetaVarName<"[auto,always,never]">, + Group<grp_lld>; +def reproduce: Separate<["--"], "reproduce">, + Group<grp_lld>; +def reproduce_eq: Joined<["--"], "reproduce=">, + Alias<!cast<Separate>(reproduce)>, + HelpText<"Write tar file containing inputs and command to reproduce link">, + Group<grp_lld>; +def version: Flag<["--"], "version">, + HelpText<"Display the version number and exit">, + Group<grp_lld>; +def lto_legacy_pass_manager: Flag<["--"], "lto-legacy-pass-manager">, + HelpText<"Use the legacy pass manager in LLVM">, + Group<grp_lld>; +def no_lto_legacy_pass_manager : Flag<["--"], "no-lto-legacy-pass-manager">, + HelpText<"Use the new pass manager in LLVM">, + Group<grp_lld>; + // This is a complete Options.td compiled from Apple's ld(1) manpage // dated 2018-03-07 and cross checked with ld64 source code in repo @@ -18,14 +51,12 @@ def grp_kind : OptionGroup<"kind">, HelpText<"OUTPUT KIND">; def execute : Flag<["-"], "execute">, HelpText<"Produce a main executable (default)">, - Flags<[HelpHidden]>, Group<grp_kind>; def dylib : Flag<["-"], "dylib">, HelpText<"Produce a shared library">, Group<grp_kind>; def bundle : Flag<["-"], "bundle">, HelpText<"Produce a bundle">, - Flags<[HelpHidden]>, Group<grp_kind>; def r : Flag<["-"], "r">, HelpText<"Merge multiple object files into one, retaining relocations">, @@ -37,7 +68,6 @@ def dylinker : Flag<["-"], "dylinker">, Group<grp_kind>; def dynamic : Flag<["-"], "dynamic">, HelpText<"Link dynamically (default)">, - Flags<[HelpHidden]>, Group<grp_kind>; def static : Flag<["-"], "static">, HelpText<"Link statically">, @@ -65,12 +95,10 @@ def l : Joined<["-"], "l">, def weak_l : Joined<["-"], "weak-l">, MetaVarName<"<name>">, HelpText<"Like -l<name>, but mark library and its references as weak imports">, - Flags<[HelpHidden]>, Group<grp_libs>; def weak_library : Separate<["-"], "weak_library">, MetaVarName<"<path>">, HelpText<"Like bare <path>, but mark library and its references as weak imports">, - Flags<[HelpHidden]>, Group<grp_libs>; def reexport_l : Joined<["-"], "reexport-l">, MetaVarName<"<name>">, @@ -102,25 +130,20 @@ def Z : Flag<["-"], "Z">, def syslibroot : Separate<["-"], "syslibroot">, MetaVarName<"<rootdir>">, HelpText<"Prepend <rootdir> to all library and framework search paths">, - Flags<[HelpHidden]>, Group<grp_libs>; def search_paths_first : Flag<["-"], "search_paths_first">, HelpText<"Search for lib<name>.dylib and lib<name>.a at each step in traversing search path (default for Xcode 4 and later)">, - Flags<[HelpHidden]>, Group<grp_libs>; def search_dylibs_first : Flag<["-"], "search_dylibs_first">, HelpText<"Search for lib<name>.dylib on first pass, then for lib<name>.a on second pass through search path (default for Xcode 3 and earlier)">, - Flags<[HelpHidden]>, Group<grp_libs>; def framework : Separate<["-"], "framework">, MetaVarName<"<name>">, HelpText<"Search for <name>.framework/<name> on the framework search path">, - Flags<[HelpHidden]>, Group<grp_libs>; def weak_framework : Separate<["-"], "weak_framework">, MetaVarName<"<name>">, HelpText<"Like -framework <name>, but mark framework and its references as weak imports">, - Flags<[HelpHidden]>, Group<grp_libs>; def reexport_framework : Separate<["-"], "reexport_framework">, MetaVarName<"<name>">, @@ -135,20 +158,16 @@ def upward_framework : Separate<["-"], "upward_framework">, def F : JoinedOrSeparate<["-"], "F">, MetaVarName<"<dir>">, HelpText<"Add dir to the framework search path">, - Flags<[HelpHidden]>, Group<grp_libs>; def all_load : Flag<["-"], "all_load">, HelpText<"Load all members of all static archive libraries">, - Flags<[HelpHidden]>, Group<grp_libs>; def ObjC : Flag<["-"], "ObjC">, HelpText<"Load all members of static archives that are an Objective-C class or category.">, - Flags<[HelpHidden]>, Group<grp_libs>; def force_load : Separate<["-"], "force_load">, MetaVarName<"<path>">, HelpText<"Load all members static archive library at <path>">, - Flags<[HelpHidden]>, Group<grp_libs>; def grp_content : OptionGroup<"content">, HelpText<"ADDITIONAL CONTENT">; @@ -156,7 +175,6 @@ def grp_content : OptionGroup<"content">, HelpText<"ADDITIONAL CONTENT">; def sectcreate : MultiArg<["-"], "sectcreate", 3>, MetaVarName<"<segment> <section> <file>">, HelpText<"Create <section> in <segment> from the contents of <file>">, - Flags<[HelpHidden]>, Group<grp_content>; def segcreate : MultiArg<["-"], "segcreate", 3>, MetaVarName<"<segment> <section> <file>">, @@ -167,7 +185,6 @@ def segcreate : MultiArg<["-"], "segcreate", 3>, def filelist : Separate<["-"], "filelist">, MetaVarName<"<file>">, HelpText<"Read names of files to link from <file>">, - Flags<[HelpHidden]>, Group<grp_content>; def dtrace : Separate<["-"], "dtrace">, MetaVarName<"<script>">, @@ -184,7 +201,6 @@ def dead_strip : Flag<["-"], "dead_strip">, def order_file : Separate<["-"], "order_file">, MetaVarName<"<file>">, HelpText<"Layout functions and data according to specification in <file>">, - Flags<[HelpHidden]>, Group<grp_opts>; def sectorder : MultiArg<["-"], "sectorder", 3>, MetaVarName<"<segname> <sectname> <orderfile>">, @@ -202,6 +218,9 @@ def no_order_data : Flag<["-"], "no_order_data">, def platform_version : MultiArg<["-"], "platform_version", 3>, MetaVarName<"<platform> <min_version> <sdk_version>">, HelpText<"Platform (e.g., macos, ios, tvos, watchos, bridgeos, mac-catalyst, ios-sim, tvos-sim, watchos-sim, driverkit) and version numbers">, + Group<grp_opts>; +def sdk_version : Separate<["-"], "sdk_version">, + HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, Group<grp_opts>; def macos_version_min : Separate<["-"], "macos_version_min">, @@ -220,6 +239,11 @@ def ios_version_min : Separate<["-"], "ios_version_min">, HelpText<"Oldest iOS version for which linked output is useable">, Flags<[HelpHidden]>, Group<grp_opts>; +def ios_simulator_version_min : Separate<["-"], "ios_simulator_version_min">, + MetaVarName<"<version>">, + HelpText<"Oldest iOS simulator version for which linked output is useable">, + Flags<[HelpHidden]>, + Group<grp_opts>; def iphoneos_version_min : Separate<["-"], "iphoneos_version_min">, MetaVarName<"<version>">, Alias<ios_version_min>, @@ -239,7 +263,6 @@ def seg1addr : Separate<["-"], "seg1addr">, Group<grp_opts>; def no_implicit_dylibs : Flag<["-"], "no_implicit_dylibs">, HelpText<"Do not optimize public dylib transitive symbol references">, - Flags<[HelpHidden]>, Group<grp_opts>; def exported_symbols_order : Separate<["-"], "exported_symbols_order">, MetaVarName<"<file>">, @@ -284,7 +307,6 @@ def mark_dead_strippable_dylib : Flag<["-"], "mark_dead_strippable_dylib">, def compatibility_version : Separate<["-"], "compatibility_version">, MetaVarName<"<version>">, HelpText<"Compatibility <version> of this library">, - Flags<[HelpHidden]>, Group<grp_dylib>; def dylib_compatibility_version : Separate<["-"], "dylib_compatibility_version">, MetaVarName<"<version>">, @@ -295,7 +317,6 @@ def dylib_compatibility_version : Separate<["-"], "dylib_compatibility_version"> def current_version : Separate<["-"], "current_version">, MetaVarName<"<version>">, HelpText<"Current <version> of this library">, - Flags<[HelpHidden]>, Group<grp_dylib>; def dylib_current_version : Separate<["-"], "dylib_current_version">, MetaVarName<"<version>">, @@ -308,11 +329,9 @@ def grp_main : OptionGroup<"main">, HelpText<"MAIN EXECUTABLE">; def pie : Flag<["-"], "pie">, HelpText<"Build a position independent executable (default for macOS 10.7 and later)">, - Flags<[HelpHidden]>, Group<grp_main>; def no_pie : Flag<["-"], "no_pie">, HelpText<"Do not build a position independent executable (default for macOS 10.6 and earlier)">, - Flags<[HelpHidden]>, Group<grp_main>; def pagezero_size : Separate<["-"], "pagezero_size">, MetaVarName<"<size>">, @@ -344,7 +363,7 @@ def bundle_loader : Separate<["-"], "bundle_loader">, def grp_object : OptionGroup<"object">, HelpText<"CREATING AN OBJECT FILE">; def keep_private_externs : Flag<["-"], "keep_private_externs">, - HelpText<"Do not convert private external symbols to static symbols">, + HelpText<"Do not convert private external symbols to static symbols (only valid with -r)">, Flags<[HelpHidden]>, Group<grp_object>; def d : Flag<["-"], "d">, @@ -406,12 +425,10 @@ def U : Separate<["-"], "U">, def undefined : Separate<["-"], "undefined">, MetaVarName<"<treatment>">, HelpText<"Handle undefined symbols according to <treatment>: error, warning, suppress, or dynamic_lookup (default is error)">, - Flags<[HelpHidden]>, Group<grp_resolve>; def rpath : Separate<["-"], "rpath">, MetaVarName<"<path>">, HelpText<"Add <path> to dyld search list for dylibs with load path prefix `@rpath/'">, - Flags<[HelpHidden]>, Group<grp_resolve>; def commons : Separate<["-"], "commons">, MetaVarName<"<treatment>">, @@ -422,8 +439,7 @@ def commons : Separate<["-"], "commons">, def grp_introspect : OptionGroup<"introspect">, HelpText<"INTROSPECTING THE LINKER">; def why_load : Flag<["-"], "why_load">, - HelpText<"Log the symbol that compels loading of each object file from a static library">, - Flags<[HelpHidden]>, + HelpText<"Log why each object file is loaded from a static library">, Group<grp_introspect>; def whyload : Flag<["-"], "whyload">, Alias<why_load>, @@ -441,7 +457,6 @@ def print_statistics : Flag<["-"], "print_statistics">, Group<grp_introspect>; def t : Flag<["-"], "t">, HelpText<"Log every file the linker loads: object, archive, and dylib">, - Flags<[HelpHidden]>, Group<grp_introspect>; def whatsloaded : Flag<["-"], "whatsloaded">, HelpText<"Logs only the object files the linker loads">, @@ -456,6 +471,14 @@ def map : Separate<["-"], "map">, HelpText<"Writes all symbols and their addresses to <path>">, Flags<[HelpHidden]>, Group<grp_introspect>; +def dependency_info : Separate<["-"], "dependency_info">, + MetaVarName<"<path>">, + HelpText<"Dump dependency info">, + Flags<[HelpHidden]>, + Group<grp_introspect>; +def save_temps : Flag<["-"], "save-temps">, + HelpText<"Save intermediate LTO compilation results">, + Group<grp_introspect>; def grp_symtab : OptionGroup<"symtab">, HelpText<"SYMBOL TABLE OPTIMIZATIONS">; @@ -502,9 +525,15 @@ def bitcode_symbol_map : Separate<["-"], "bitcode_symbol_map">, def grp_rare : OptionGroup<"rare">, HelpText<"RARELY USED">; def v : Flag<["-"], "v">, - HelpText<"Print the linker version">, + HelpText<"Print the linker version and search paths and exit">, + Group<grp_rare>; +def adhoc_codesign : Flag<["-"], "adhoc_codesign">, + HelpText<"Write an ad-hoc code signature to the output file.">, Flags<[HelpHidden]>, Group<grp_rare>; +def no_adhoc_codesign : Flag<["-"], "no_adhoc_codesign">, + HelpText<"Do not write an ad-hoc code signature to the output file.">, + Group<grp_rare>; def version_details : Flag<["-"], "version_details">, HelpText<"Print the linker version in JSON form">, Flags<[HelpHidden]>, @@ -595,8 +624,7 @@ def no_application_extension : Flag<["-"], "no_application_extension">, Flags<[HelpHidden]>, Group<grp_rare>; def fatal_warnings : Flag<["-"], "fatal_warnings">, - HelpText<"Escalate warnings as errors">, - Flags<[HelpHidden]>, + HelpText<"Treat warnings as errors">, Group<grp_rare>; def no_eh_labels : Flag<["-"], "no_eh_labels">, HelpText<"In -r mode, suppress .eh labels in the __eh_frame section">, @@ -663,12 +691,10 @@ def init : Separate<["-"], "init">, def sub_library : Separate<["-"], "sub_library">, MetaVarName<"<name>">, HelpText<"Re-export the dylib as <name>">, - Flags<[HelpHidden]>, Group<grp_rare>; def sub_umbrella : Separate<["-"], "sub_umbrella">, MetaVarName<"<name>">, HelpText<"Re-export the framework as <name>">, - Flags<[HelpHidden]>, Group<grp_rare>; def allowable_client : Separate<["-"], "allowable_client">, MetaVarName<"<name>">, @@ -687,12 +713,10 @@ def umbrella : Separate<["-"], "umbrella">, Group<grp_rare>; def headerpad : Separate<["-"], "headerpad">, MetaVarName<"<size>">, - HelpText<"Allocate hex <size> extra space for future expansion of the load commands via install_name_tool">, - Flags<[HelpHidden]>, + HelpText<"Allocate hex <size> extra space for future expansion of the load commands via install_name_tool (default is 0x20)">, Group<grp_rare>; def headerpad_max_install_names : Flag<["-"], "headerpad_max_install_names">, HelpText<"Allocate extra space so all load-command paths can expand to MAXPATHLEN via install_name_tool">, - Flags<[HelpHidden]>, Group<grp_rare>; def bind_at_load : Flag<["-"], "bind_at_load">, HelpText<"Tell dyld to bind all symbols at load time, rather than lazily">, @@ -835,12 +859,10 @@ def no_objc_category_merging : Flag<["-"], "no_objc_category_merging">, def object_path_lto : Separate<["-"], "object_path_lto">, MetaVarName<"<path>">, HelpText<"Retain any temporary mach-o file in <path> that would otherwise be deleted during LTO">, - Flags<[HelpHidden]>, Group<grp_rare>; def lto_library : Separate<["-"], "lto_library">, MetaVarName<"<path>">, - HelpText<"Override the default ../lib/libLTO.dylib as <path>">, - Flags<[HelpHidden]>, + HelpText<"Deprecated & ignored. LLD supports LTO directly, without using an external dylib.">, Group<grp_rare>; def cache_path_lto : Separate<["-"], "cache_path_lto">, MetaVarName<"<path>">, @@ -870,6 +892,12 @@ def not_for_dyld_shared_cache : Flag<["-"], "not_for_dyld_shared_cache">, HelpText<"Prevent system dylibs from being placed into the dylib shared cache">, Flags<[HelpHidden]>, Group<grp_rare>; +def mllvm : Separate<["-"], "mllvm">, + HelpText<"Options to pass to LLVM">, + Group<grp_rare>; +def mcpu : Separate<["-"], "mcpu">, + HelpText<"Processor family target for LTO code generation">, + Group<grp_rare>; def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">; @@ -1095,13 +1123,7 @@ def debug_snapshot : Flag<["-"], "debug_snapshot">, Flags<[HelpHidden]>, Group<grp_undocumented>; def demangle : Flag<["-"], "demangle">, - HelpText<"This option is undocumented in ld64">, - Flags<[HelpHidden]>, - Group<grp_undocumented>; -def dependency_info : Flag<["-"], "dependency_info">, - HelpText<"This option is undocumented in ld64">, - Flags<[HelpHidden]>, - Group<grp_undocumented>; + HelpText<"Demangle symbol names in diagnostics">; def dyld_env : Flag<["-"], "dyld_env">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, @@ -1208,14 +1230,6 @@ def uikitformac_version_min : Flag<["-"], "uikitformac_version_min">, HelpText<"Alias for -maccatalyst_version_min">, Flags<[HelpHidden]>, Group<grp_undocumented>; -def mcpu : Flag<["-"], "mcpu">, - HelpText<"This option is undocumented in ld64">, - Flags<[HelpHidden]>, - Group<grp_undocumented>; -def mllvm : Flag<["-"], "mllvm">, - HelpText<"This option is undocumented in ld64">, - Flags<[HelpHidden]>, - Group<grp_undocumented>; def no_compact_unwind : Flag<["-"], "no_compact_unwind">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, @@ -1232,7 +1246,7 @@ def no_new_main : Flag<["-"], "no_new_main">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, Group<grp_undocumented>; -def objc_abi_version : Flag<["-"], "objc_abi_version">, +def objc_abi_version : Separate<["-"], "objc_abi_version">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, Group<grp_undocumented>; @@ -1244,14 +1258,6 @@ def random_uuid : Flag<["-"], "random_uuid">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, Group<grp_undocumented>; -def save_temps : Flag<["-"], "save-temps">, - HelpText<"This option is undocumented in ld64">, - Flags<[HelpHidden]>, - Group<grp_undocumented>; -def sdk_version : Flag<["-"], "sdk_version">, - HelpText<"This option is undocumented in ld64">, - Flags<[HelpHidden]>, - Group<grp_undocumented>; def simulator_support : Flag<["-"], "simulator_support">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, diff --git a/contrib/llvm-project/lld/MachO/OutputSection.h b/contrib/llvm-project/lld/MachO/OutputSection.h index 07b53a04639f..c526a8343afe 100644 --- a/contrib/llvm-project/lld/MachO/OutputSection.h +++ b/contrib/llvm-project/lld/MachO/OutputSection.h @@ -63,6 +63,8 @@ public: uint64_t fileOff = 0; uint32_t align = 1; uint32_t flags = 0; + uint32_t reserved1 = 0; + uint32_t reserved2 = 0; private: Kind sectionKind; diff --git a/contrib/llvm-project/lld/MachO/OutputSegment.cpp b/contrib/llvm-project/lld/MachO/OutputSegment.cpp index 5e57c49f5c0a..c00e819ea3a4 100644 --- a/contrib/llvm-project/lld/MachO/OutputSegment.cpp +++ b/contrib/llvm-project/lld/MachO/OutputSegment.cpp @@ -31,9 +31,9 @@ static uint32_t initProt(StringRef name) { } static uint32_t maxProt(StringRef name) { - if (name == segment_names::pageZero) - return 0; - return VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; + assert(config->arch != AK_i386 && + "TODO: i386 has different maxProt requirements"); + return initProt(name); } size_t OutputSegment::numNonHiddenSections() const { @@ -49,12 +49,12 @@ void OutputSegment::addOutputSection(OutputSection *osec) { sections.push_back(osec); } -static llvm::DenseMap<StringRef, OutputSegment *> nameToOutputSegment; +static DenseMap<StringRef, OutputSegment *> nameToOutputSegment; std::vector<OutputSegment *> macho::outputSegments; OutputSegment *macho::getOrCreateOutputSegment(StringRef name) { OutputSegment *&segRef = nameToOutputSegment[name]; - if (segRef != nullptr) + if (segRef) return segRef; segRef = make<OutputSegment>(); diff --git a/contrib/llvm-project/lld/MachO/OutputSegment.h b/contrib/llvm-project/lld/MachO/OutputSegment.h index d977c281272f..63b62d5e9109 100644 --- a/contrib/llvm-project/lld/MachO/OutputSegment.h +++ b/contrib/llvm-project/lld/MachO/OutputSegment.h @@ -22,6 +22,8 @@ constexpr const char text[] = "__TEXT"; constexpr const char data[] = "__DATA"; constexpr const char linkEdit[] = "__LINKEDIT"; constexpr const char dataConst[] = "__DATA_CONST"; +constexpr const char ld[] = "__LD"; // output only with -r +constexpr const char dwarf[] = "__DWARF"; } // namespace segment_names diff --git a/contrib/llvm-project/lld/MachO/SymbolTable.cpp b/contrib/llvm-project/lld/MachO/SymbolTable.cpp index 80e870d79890..2f0844fadaaa 100644 --- a/contrib/llvm-project/lld/MachO/SymbolTable.cpp +++ b/contrib/llvm-project/lld/MachO/SymbolTable.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "SymbolTable.h" +#include "Config.h" #include "InputFiles.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" @@ -17,7 +18,7 @@ using namespace lld; using namespace lld::macho; Symbol *SymbolTable::find(StringRef name) { - auto it = symMap.find(llvm::CachedHashStringRef(name)); + auto it = symMap.find(CachedHashStringRef(name)); if (it == symMap.end()) return nullptr; return symVector[it->second]; @@ -37,51 +38,150 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) { } Symbol *SymbolTable::addDefined(StringRef name, InputSection *isec, - uint32_t value) { + uint32_t value, bool isWeakDef, + bool isPrivateExtern) { Symbol *s; bool wasInserted; + bool overridesWeakDef = false; std::tie(s, wasInserted) = insert(name); - if (!wasInserted && isa<Defined>(s)) - error("duplicate symbol: " + name); - - replaceSymbol<Defined>(s, name, isec, value); + if (!wasInserted) { + if (auto *defined = dyn_cast<Defined>(s)) { + if (isWeakDef) { + // Both old and new symbol weak (e.g. inline function in two TUs): + // If one of them isn't private extern, the merged symbol isn't. + if (defined->isWeakDef()) + defined->privateExtern &= isPrivateExtern; + return s; + } + if (!defined->isWeakDef()) + error("duplicate symbol: " + name); + } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { + overridesWeakDef = !isWeakDef && dysym->isWeakDef(); + } + // Defined symbols take priority over other types of symbols, so in case + // of a name conflict, we fall through to the replaceSymbol() call below. + } + + Defined *defined = + replaceSymbol<Defined>(s, name, isec, value, isWeakDef, + /*isExternal=*/true, isPrivateExtern); + defined->overridesWeakDef = overridesWeakDef; return s; } -Symbol *SymbolTable::addUndefined(StringRef name) { +Symbol *SymbolTable::addUndefined(StringRef name, bool isWeakRef) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name); + auto refState = isWeakRef ? RefState::Weak : RefState::Strong; + if (wasInserted) - replaceSymbol<Undefined>(s, name); - else if (LazySymbol *lazy = dyn_cast<LazySymbol>(s)) + replaceSymbol<Undefined>(s, name, refState); + else if (auto *lazy = dyn_cast<LazySymbol>(s)) lazy->fetchArchiveMember(); + else if (auto *dynsym = dyn_cast<DylibSymbol>(s)) + dynsym->refState = std::max(dynsym->refState, refState); + else if (auto *undefined = dyn_cast<Undefined>(s)) + undefined->refState = std::max(undefined->refState, refState); return s; } -Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file) { +Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size, + uint32_t align, bool isPrivateExtern) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name); - if (wasInserted || isa<Undefined>(s)) - replaceSymbol<DylibSymbol>(s, file, name); + if (!wasInserted) { + if (auto *common = dyn_cast<CommonSymbol>(s)) { + if (size < common->size) + return s; + } else if (isa<Defined>(s)) { + return s; + } + // Common symbols take priority over all non-Defined symbols, so in case of + // a name conflict, we fall through to the replaceSymbol() call below. + } + + replaceSymbol<CommonSymbol>(s, name, file, size, align, isPrivateExtern); + return s; +} + +Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef, + bool isTlv) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(name); + + auto refState = RefState::Unreferenced; + if (!wasInserted) { + if (auto *defined = dyn_cast<Defined>(s)) { + if (isWeakDef && !defined->isWeakDef()) + defined->overridesWeakDef = true; + } else if (auto *undefined = dyn_cast<Undefined>(s)) { + refState = undefined->refState; + } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { + refState = dysym->refState; + } + } + + if (wasInserted || isa<Undefined>(s) || + (isa<DylibSymbol>(s) && !isWeakDef && s->isWeakDef())) + replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv); + return s; } Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file, - const llvm::object::Archive::Symbol &sym) { + const object::Archive::Symbol &sym) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name); if (wasInserted) replaceSymbol<LazySymbol>(s, file, sym); - else if (isa<Undefined>(s)) + else if (isa<Undefined>(s) || (isa<DylibSymbol>(s) && s->isWeakDef())) file->fetch(sym); return s; } +Symbol *SymbolTable::addDSOHandle(const MachHeaderSection *header) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(DSOHandle::name); + if (!wasInserted) { + // FIXME: Make every symbol (including absolute symbols) contain a + // reference to their originating file, then add that file name to this + // error message. + if (isa<Defined>(s)) + error("found defined symbol with illegal name " + DSOHandle::name); + } + replaceSymbol<DSOHandle>(s, header); + return s; +} + +void lld::macho::treatUndefinedSymbol(StringRef symbolName, + StringRef fileName) { + std::string message = ("undefined symbol: " + symbolName).str(); + if (!fileName.empty()) + message += ("\n>>> referenced by " + fileName).str(); + switch (config->undefinedSymbolTreatment) { + case UndefinedSymbolTreatment::suppress: + break; + case UndefinedSymbolTreatment::error: + error(message); + break; + case UndefinedSymbolTreatment::warning: + warn(message); + break; + case UndefinedSymbolTreatment::dynamic_lookup: + error("dynamic_lookup unimplemented for " + message); + break; + case UndefinedSymbolTreatment::unknown: + llvm_unreachable("unknown -undefined TREATMENT"); + } +} + SymbolTable *macho::symtab; diff --git a/contrib/llvm-project/lld/MachO/SymbolTable.h b/contrib/llvm-project/lld/MachO/SymbolTable.h index 2379008db56d..871687f75eb7 100644 --- a/contrib/llvm-project/lld/MachO/SymbolTable.h +++ b/contrib/llvm-project/lld/MachO/SymbolTable.h @@ -19,20 +19,34 @@ namespace macho { class ArchiveFile; class DylibFile; +class InputFile; class InputSection; +class MachHeaderSection; class Symbol; +/* + * Note that the SymbolTable handles name collisions by calling + * replaceSymbol(), which does an in-place update of the Symbol via `placement + * new`. Therefore, there is no need to update any relocations that hold + * pointers the "old" Symbol -- they will automatically point to the new one. + */ class SymbolTable { public: - Symbol *addDefined(StringRef name, InputSection *isec, uint32_t value); + Symbol *addDefined(StringRef name, InputSection *isec, uint32_t value, + bool isWeakDef, bool isPrivateExtern); - Symbol *addUndefined(StringRef name); + Symbol *addUndefined(StringRef name, bool isWeakRef); - Symbol *addDylib(StringRef name, DylibFile *file); + Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align, + bool isPrivateExtern); + + Symbol *addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool isTlv); Symbol *addLazy(StringRef name, ArchiveFile *file, const llvm::object::Archive::Symbol &sym); + Symbol *addDSOHandle(const MachHeaderSection *); + ArrayRef<Symbol *> getSymbols() const { return symVector; } Symbol *find(StringRef name); @@ -42,6 +56,8 @@ private: std::vector<Symbol *> symVector; }; +extern void treatUndefinedSymbol(StringRef symbolName, StringRef fileName); + extern SymbolTable *symtab; } // namespace macho diff --git a/contrib/llvm-project/lld/MachO/Symbols.cpp b/contrib/llvm-project/lld/MachO/Symbols.cpp index fbafa8a92a4f..4c83188fd259 100644 --- a/contrib/llvm-project/lld/MachO/Symbols.cpp +++ b/contrib/llvm-project/lld/MachO/Symbols.cpp @@ -8,16 +8,46 @@ #include "Symbols.h" #include "InputFiles.h" +#include "SyntheticSections.h" using namespace llvm; using namespace lld; using namespace lld::macho; -void LazySymbol::fetchArchiveMember() { file->fetch(sym); } - // Returns a symbol for an error message. +static std::string demangle(StringRef symName) { + if (config->demangle) + return demangleItanium(symName); + return std::string(symName); +} + std::string lld::toString(const Symbol &sym) { - if (Optional<std::string> s = demangleItanium(sym.getName())) - return *s; - return std::string(sym.getName()); + return demangle(sym.getName()); +} + +std::string lld::toMachOString(const object::Archive::Symbol &b) { + return demangle(b.getName()); +} + +uint64_t Defined::getVA() const { + if (isAbsolute()) + return value; + return isec->getVA() + value; } + +uint64_t Defined::getFileOffset() const { + if (isAbsolute()) { + error("absolute symbol " + toString(*this) + + " does not have a file offset"); + return 0; + } + return isec->getFileOffset() + value; +} + +void LazySymbol::fetchArchiveMember() { file->fetch(sym); } + +uint64_t DSOHandle::getVA() const { return header->addr; } + +uint64_t DSOHandle::getFileOffset() const { return header->fileOff; } + +constexpr StringRef DSOHandle::name; diff --git a/contrib/llvm-project/lld/MachO/Symbols.h b/contrib/llvm-project/lld/MachO/Symbols.h index 63748ee48324..7f987c722a1f 100644 --- a/contrib/llvm-project/lld/MachO/Symbols.h +++ b/contrib/llvm-project/lld/MachO/Symbols.h @@ -14,11 +14,13 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Strings.h" #include "llvm/Object/Archive.h" +#include "llvm/Support/MathExtras.h" namespace lld { namespace macho { class InputSection; +class MachHeaderSection; class DylibFile; class ArchiveFile; @@ -35,55 +37,174 @@ public: enum Kind { DefinedKind, UndefinedKind, + CommonKind, DylibKind, LazyKind, + DSOHandleKind, }; + virtual ~Symbol() {} + Kind kind() const { return static_cast<Kind>(symbolKind); } - StringRef getName() const { return {name.data, name.size}; } + StringRef getName() const { + if (nameSize == (uint32_t)-1) + nameSize = strlen(nameData); + return {nameData, nameSize}; + } + + virtual uint64_t getVA() const { return 0; } + + virtual uint64_t getFileOffset() const { + llvm_unreachable("attempt to get an offset from a non-defined symbol"); + } + + virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } + + // Only undefined or dylib symbols can be weak references. A weak reference + // need not be satisfied at runtime, e.g. due to the symbol not being + // available on a given target platform. + virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } - uint64_t getVA() const; + virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } - uint64_t getFileOffset() const; + // Whether this symbol is in the GOT or TLVPointer sections. + bool isInGot() const { return gotIndex != UINT32_MAX; } + // Whether this symbol is in the StubsSection. + bool isInStubs() const { return stubsIndex != UINT32_MAX; } + + // The index of this symbol in the GOT or the TLVPointer section, depending + // on whether it is a thread-local. A given symbol cannot be referenced by + // both these sections at once. uint32_t gotIndex = UINT32_MAX; + uint32_t stubsIndex = UINT32_MAX; + + uint32_t symtabIndex = UINT32_MAX; + protected: - Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {} + Symbol(Kind k, StringRefZ name) + : symbolKind(k), nameData(name.data), nameSize(name.size) {} Kind symbolKind; - StringRefZ name; + const char *nameData; + mutable uint32_t nameSize; }; class Defined : public Symbol { public: - Defined(StringRefZ name, InputSection *isec, uint32_t value) - : Symbol(DefinedKind, name), isec(isec), value(value) {} + Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef, + bool isExternal, bool isPrivateExtern) + : Symbol(DefinedKind, name), isec(isec), value(value), + overridesWeakDef(false), privateExtern(isPrivateExtern), + weakDef(isWeakDef), external(isExternal) {} + + bool isWeakDef() const override { return weakDef; } + bool isExternalWeakDef() const { + return isWeakDef() && isExternal() && !privateExtern; + } + bool isTlv() const override { + return !isAbsolute() && isThreadLocalVariables(isec->flags); + } + + bool isExternal() const { return external; } + bool isAbsolute() const { return isec == nullptr; } + + uint64_t getVA() const override; + uint64_t getFileOffset() const override; + + static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } InputSection *isec; uint32_t value; - static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } + bool overridesWeakDef : 1; + bool privateExtern : 1; + +private: + const bool weakDef : 1; + const bool external : 1; }; +// This enum does double-duty: as a symbol property, it indicates whether & how +// a dylib symbol is referenced. As a DylibFile property, it indicates the kind +// of referenced symbols contained within the file. If there are both weak +// and strong references to the same file, we will count the file as +// strongly-referenced. +enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; + class Undefined : public Symbol { public: - Undefined(StringRefZ name) : Symbol(UndefinedKind, name) {} + Undefined(StringRefZ name, RefState refState) + : Symbol(UndefinedKind, name), refState(refState) { + assert(refState != RefState::Unreferenced); + } + + bool isWeakRef() const override { return refState == RefState::Weak; } static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } + + RefState refState : 2; +}; + +// On Unix, it is traditionally allowed to write variable definitions without +// initialization expressions (such as "int foo;") to header files. These are +// called tentative definitions. +// +// Using tentative definitions is usually considered a bad practice; you should +// write only declarations (such as "extern int foo;") to header files. +// Nevertheless, the linker and the compiler have to do something to support +// bad code by allowing duplicate definitions for this particular case. +// +// The compiler creates common symbols when it sees tentative definitions. +// (You can suppress this behavior and let the compiler create a regular +// defined symbol by passing -fno-common. -fno-common is the default in clang +// as of LLVM 11.0.) When linking the final binary, if there are remaining +// common symbols after name resolution is complete, the linker converts them +// to regular defined symbols in a __common section. +class CommonSymbol : public Symbol { +public: + CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, + bool isPrivateExtern) + : Symbol(CommonKind, name), file(file), size(size), + align(align != 1 ? align : llvm::PowerOf2Ceil(size)), + privateExtern(isPrivateExtern) { + // TODO: cap maximum alignment + } + + static bool classof(const Symbol *s) { return s->kind() == CommonKind; } + + InputFile *const file; + const uint64_t size; + const uint32_t align; + const bool privateExtern; }; class DylibSymbol : public Symbol { public: - DylibSymbol(DylibFile *file, StringRefZ name) - : Symbol(DylibKind, name), file(file) {} + DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, + RefState refState, bool isTlv) + : Symbol(DylibKind, name), file(file), refState(refState), + weakDef(isWeakDef), tlv(isTlv) {} + + bool isWeakDef() const override { return weakDef; } + bool isWeakRef() const override { return refState == RefState::Weak; } + bool isReferenced() const { return refState != RefState::Unreferenced; } + bool isTlv() const override { return tlv; } + bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } static bool classof(const Symbol *s) { return s->kind() == DylibKind; } DylibFile *file; - uint32_t stubsIndex = UINT32_MAX; + uint32_t stubsHelperIndex = UINT32_MAX; uint32_t lazyBindOffset = UINT32_MAX; + + RefState refState : 2; + +private: + const bool weakDef : 1; + const bool tlv : 1; }; class LazySymbol : public Symbol { @@ -100,39 +221,62 @@ private: const llvm::object::Archive::Symbol sym; }; -inline uint64_t Symbol::getVA() const { - if (auto *d = dyn_cast<Defined>(this)) - return d->isec->getVA() + d->value; - return 0; -} +// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which +// does e.g. cleanup of static global variables. The ABI document says that the +// pointer can point to any address in one of the dylib's segments, but in +// practice ld64 seems to set it to point to the header, so that's what's +// implemented here. +// +// The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet +// tested this on an ARM platform. +// +// DSOHandle effectively functions like a Defined symbol, but it doesn't belong +// to an InputSection. +class DSOHandle : public Symbol { +public: + DSOHandle(const MachHeaderSection *header) + : Symbol(DSOHandleKind, name), header(header) {} -inline uint64_t Symbol::getFileOffset() const { - if (auto *d = dyn_cast<Defined>(this)) - return d->isec->getFileOffset() + d->value; - llvm_unreachable("attempt to get an offset from an undefined symbol"); -} + const MachHeaderSection *header; + + uint64_t getVA() const override; + + uint64_t getFileOffset() const override; + + bool isWeakDef() const override { return false; } + + bool isTlv() const override { return false; } + + static constexpr StringRef name = "___dso_handle"; + + static bool classof(const Symbol *s) { return s->kind() == DSOHandleKind; } +}; union SymbolUnion { alignas(Defined) char a[sizeof(Defined)]; alignas(Undefined) char b[sizeof(Undefined)]; - alignas(DylibSymbol) char c[sizeof(DylibSymbol)]; - alignas(LazySymbol) char d[sizeof(LazySymbol)]; + alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; + alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; + alignas(LazySymbol) char e[sizeof(LazySymbol)]; + alignas(DSOHandle) char f[sizeof(DSOHandle)]; }; template <typename T, typename... ArgT> -void replaceSymbol(Symbol *s, ArgT &&... arg) { +T *replaceSymbol(Symbol *s, ArgT &&... arg) { static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); static_assert(alignof(T) <= alignof(SymbolUnion), "SymbolUnion not aligned enough"); assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && "Not a Symbol"); - new (s) T(std::forward<ArgT>(arg)...); + return new (s) T(std::forward<ArgT>(arg)...); } } // namespace macho std::string toString(const macho::Symbol &); +std::string toMachOString(const llvm::object::Archive::Symbol &); + } // namespace lld #endif diff --git a/contrib/llvm-project/lld/MachO/SyntheticSections.cpp b/contrib/llvm-project/lld/MachO/SyntheticSections.cpp index cc0d5a93c40d..3f5413696d4b 100644 --- a/contrib/llvm-project/lld/MachO/SyntheticSections.cpp +++ b/contrib/llvm-project/lld/MachO/SyntheticSections.cpp @@ -19,8 +19,11 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/EndianStream.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/Path.h" using namespace llvm; using namespace llvm::support; @@ -47,7 +50,7 @@ void MachHeaderSection::addLoadCommand(LoadCommand *lc) { } uint64_t MachHeaderSection::getSize() const { - return sizeof(MachO::mach_header_64) + sizeOfCmds; + return sizeof(MachO::mach_header_64) + sizeOfCmds + config->headerPad; } void MachHeaderSection::writeTo(uint8_t *buf) const { @@ -59,9 +62,28 @@ void MachHeaderSection::writeTo(uint8_t *buf) const { hdr->ncmds = loadCommands.size(); hdr->sizeofcmds = sizeOfCmds; hdr->flags = MachO::MH_NOUNDEFS | MachO::MH_DYLDLINK | MachO::MH_TWOLEVEL; + if (config->outputType == MachO::MH_DYLIB && !config->hasReexports) hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS; + if (config->outputType == MachO::MH_EXECUTE && config->isPic) + hdr->flags |= MachO::MH_PIE; + + if (in.exports->hasWeakSymbol || in.weakBinding->hasNonWeakDefinition()) + hdr->flags |= MachO::MH_WEAK_DEFINES; + + if (in.exports->hasWeakSymbol || in.weakBinding->hasEntry()) + hdr->flags |= MachO::MH_BINDS_TO_WEAK; + + for (OutputSegment *seg : outputSegments) { + for (OutputSection *osec : seg->getSections()) { + if (isThreadLocalVariables(osec->flags)) { + hdr->flags |= MachO::MH_HAS_TLV_DESCRIPTORS; + break; + } + } + } + uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1); for (LoadCommand *lc : loadCommands) { lc->writeTo(p); @@ -72,33 +94,121 @@ void MachHeaderSection::writeTo(uint8_t *buf) const { PageZeroSection::PageZeroSection() : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} -GotSection::GotSection() - : SyntheticSection(segment_names::dataConst, section_names::got) { +uint64_t Location::getVA() const { + if (const auto *isec = section.dyn_cast<const InputSection *>()) + return isec->getVA() + offset; + return section.get<const OutputSection *>()->addr + offset; +} + +RebaseSection::RebaseSection() + : LinkEditSection(segment_names::linkEdit, section_names::rebase) {} + +namespace { +struct Rebase { + OutputSegment *segment = nullptr; + uint64_t offset = 0; + uint64_t consecutiveCount = 0; +}; +} // namespace + +// Rebase opcodes allow us to describe a contiguous sequence of rebase location +// using a single DO_REBASE opcode. To take advantage of it, we delay emitting +// `DO_REBASE` until we have reached the end of a contiguous sequence. +static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) { + using namespace llvm::MachO; + assert(rebase.consecutiveCount != 0); + if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) { + os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES | + rebase.consecutiveCount); + } else { + os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); + encodeULEB128(rebase.consecutiveCount, os); + } + rebase.consecutiveCount = 0; +} + +static void encodeRebase(const OutputSection *osec, uint64_t outSecOff, + Rebase &lastRebase, raw_svector_ostream &os) { + using namespace llvm::MachO; + OutputSegment *seg = osec->parent; + uint64_t offset = osec->getSegmentOffset() + outSecOff; + if (lastRebase.segment != seg || lastRebase.offset != offset) { + if (lastRebase.consecutiveCount != 0) + encodeDoRebase(lastRebase, os); + + if (lastRebase.segment != seg) { + os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | + seg->index); + encodeULEB128(offset, os); + lastRebase.segment = seg; + lastRebase.offset = offset; + } else { + assert(lastRebase.offset != offset); + os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB); + encodeULEB128(offset - lastRebase.offset, os); + lastRebase.offset = offset; + } + } + ++lastRebase.consecutiveCount; + // DO_REBASE causes dyld to both perform the binding and increment the offset + lastRebase.offset += WordSize; +} + +void RebaseSection::finalizeContents() { + using namespace llvm::MachO; + if (locations.empty()) + return; + + raw_svector_ostream os{contents}; + Rebase lastRebase; + + os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER); + + llvm::sort(locations, [](const Location &a, const Location &b) { + return a.getVA() < b.getVA(); + }); + for (const Location &loc : locations) { + if (const auto *isec = loc.section.dyn_cast<const InputSection *>()) { + encodeRebase(isec->parent, isec->outSecOff + loc.offset, lastRebase, os); + } else { + const auto *osec = loc.section.get<const OutputSection *>(); + encodeRebase(osec, loc.offset, lastRebase, os); + } + } + if (lastRebase.consecutiveCount != 0) + encodeDoRebase(lastRebase, os); + + os << static_cast<uint8_t>(REBASE_OPCODE_DONE); +} + +void RebaseSection::writeTo(uint8_t *buf) const { + memcpy(buf, contents.data(), contents.size()); +} + +NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname, + const char *name) + : SyntheticSection(segname, name) { align = 8; flags = MachO::S_NON_LAZY_SYMBOL_POINTERS; - - // TODO: section_64::reserved1 should be an index into the indirect symbol - // table, which we do not currently emit } -void GotSection::addEntry(Symbol &sym) { - if (entries.insert(&sym)) { - sym.gotIndex = entries.size() - 1; +void NonLazyPointerSectionBase::addEntry(Symbol *sym) { + if (entries.insert(sym)) { + assert(!sym->isInGot()); + sym->gotIndex = entries.size() - 1; + + addNonLazyBindingEntries(sym, this, sym->gotIndex * WordSize); } } -void GotSection::writeTo(uint8_t *buf) const { +void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const { for (size_t i = 0, n = entries.size(); i < n; ++i) if (auto *defined = dyn_cast<Defined>(entries[i])) write64le(&buf[i * WordSize], defined->getVA()); } BindingSection::BindingSection() - : SyntheticSection(segment_names::linkEdit, section_names::binding) {} - -bool BindingSection::isNeeded() const { - return bindings.size() != 0 || in.got->isNeeded(); -} + : LinkEditSection(segment_names::linkEdit, section_names::binding) {} namespace { struct Binding { @@ -109,15 +219,16 @@ struct Binding { }; } // namespace -// Encode a sequence of opcodes that tell dyld to write the address of dysym + +// Encode a sequence of opcodes that tell dyld to write the address of symbol + // addend at osec->addr + outSecOff. // // The bind opcode "interpreter" remembers the values of each binding field, so // we only need to encode the differences between bindings. Hence the use of // lastBinding. -static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec, +static void encodeBinding(const Symbol *sym, const OutputSection *osec, uint64_t outSecOff, int64_t addend, - Binding &lastBinding, raw_svector_ostream &os) { + bool isWeakBinding, Binding &lastBinding, + raw_svector_ostream &os) { using namespace llvm::MachO; OutputSegment *seg = osec->parent; uint64_t offset = osec->getSegmentOffset() + outSecOff; @@ -128,37 +239,52 @@ static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec, lastBinding.segment = seg; lastBinding.offset = offset; } else if (lastBinding.offset != offset) { - assert(lastBinding.offset <= offset); os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB); encodeULEB128(offset - lastBinding.offset, os); lastBinding.offset = offset; } - if (lastBinding.ordinal != dysym.file->ordinal) { - if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) { - os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | - dysym.file->ordinal); - } else { - error("TODO: Support larger dylib symbol ordinals"); - return; - } - lastBinding.ordinal = dysym.file->ordinal; - } - if (lastBinding.addend != addend) { os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB); encodeSLEB128(addend, os); lastBinding.addend = addend; } - os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << dysym.getName() << '\0' + uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; + if (!isWeakBinding && sym->isWeakRef()) + flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; + + os << flags << sym->getName() << '\0' << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) << static_cast<uint8_t>(BIND_OPCODE_DO_BIND); // DO_BIND causes dyld to both perform the binding and increment the offset lastBinding.offset += WordSize; } +// Non-weak bindings need to have their dylib ordinal encoded as well. +static void encodeDylibOrdinal(const DylibSymbol *dysym, Binding &lastBinding, + raw_svector_ostream &os) { + using namespace llvm::MachO; + if (lastBinding.ordinal != dysym->file->ordinal) { + if (dysym->file->ordinal <= BIND_IMMEDIATE_MASK) { + os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + dysym->file->ordinal); + } else { + os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + encodeULEB128(dysym->file->ordinal, os); + } + lastBinding.ordinal = dysym->file->ordinal; + } +} + +static void encodeWeakOverride(const Defined *defined, + raw_svector_ostream &os) { + using namespace llvm::MachO; + os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | + BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) + << defined->getName() << '\0'; +} + // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld // interprets to update a record with the following fields: // * segment index (of the segment to write the symbol addresses to, typically @@ -175,37 +301,25 @@ static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec, void BindingSection::finalizeContents() { raw_svector_ostream os{contents}; Binding lastBinding; - bool didEncode = false; - size_t gotIdx = 0; - for (const Symbol *sym : in.got->getEntries()) { - if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { - didEncode = true; - encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os); - } - ++gotIdx; - } - // Sorting the relocations by segment and address allows us to encode them - // more compactly. + // Since bindings are delta-encoded, sorting them allows for a more compact + // result. Note that sorting by address alone ensures that bindings for the + // same segment / section are located together. llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { - OutputSegment *segA = a.isec->parent->parent; - OutputSegment *segB = b.isec->parent->parent; - if (segA != segB) - return segA->fileOff < segB->fileOff; - OutputSection *osecA = a.isec->parent; - OutputSection *osecB = b.isec->parent; - if (osecA != osecB) - return osecA->addr < osecB->addr; - if (a.isec != b.isec) - return a.isec->outSecOff < b.isec->outSecOff; - return a.offset < b.offset; + return a.target.getVA() < b.target.getVA(); }); for (const BindingEntry &b : bindings) { - didEncode = true; - encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset, - b.addend, lastBinding, os); + encodeDylibOrdinal(b.dysym, lastBinding, os); + if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) { + encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset, + b.addend, /*isWeakBinding=*/false, lastBinding, os); + } else { + auto *osec = b.target.section.get<const OutputSection *>(); + encodeBinding(b.dysym, osec, b.target.offset, b.addend, + /*isWeakBinding=*/false, lastBinding, os); + } } - if (didEncode) + if (!bindings.empty()) os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE); } @@ -213,8 +327,73 @@ void BindingSection::writeTo(uint8_t *buf) const { memcpy(buf, contents.data(), contents.size()); } +WeakBindingSection::WeakBindingSection() + : LinkEditSection(segment_names::linkEdit, section_names::weakBinding) {} + +void WeakBindingSection::finalizeContents() { + raw_svector_ostream os{contents}; + Binding lastBinding; + + for (const Defined *defined : definitions) + encodeWeakOverride(defined, os); + + // Since bindings are delta-encoded, sorting them allows for a more compact + // result. + llvm::sort(bindings, + [](const WeakBindingEntry &a, const WeakBindingEntry &b) { + return a.target.getVA() < b.target.getVA(); + }); + for (const WeakBindingEntry &b : bindings) { + if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) { + encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset, + b.addend, /*isWeakBinding=*/true, lastBinding, os); + } else { + auto *osec = b.target.section.get<const OutputSection *>(); + encodeBinding(b.symbol, osec, b.target.offset, b.addend, + /*isWeakBinding=*/true, lastBinding, os); + } + } + if (!bindings.empty() || !definitions.empty()) + os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE); +} + +void WeakBindingSection::writeTo(uint8_t *buf) const { + memcpy(buf, contents.data(), contents.size()); +} + +bool macho::needsBinding(const Symbol *sym) { + if (isa<DylibSymbol>(sym)) + return true; + if (const auto *defined = dyn_cast<Defined>(sym)) + return defined->isExternalWeakDef(); + return false; +} + +void macho::addNonLazyBindingEntries(const Symbol *sym, + SectionPointerUnion section, + uint64_t offset, int64_t addend) { + if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { + in.binding->addEntry(dysym, section, offset, addend); + if (dysym->isWeakDef()) + in.weakBinding->addEntry(sym, section, offset, addend); + } else if (auto *defined = dyn_cast<Defined>(sym)) { + in.rebase->addEntry(section, offset); + if (defined->isExternalWeakDef()) + in.weakBinding->addEntry(sym, section, offset, addend); + } else if (isa<DSOHandle>(sym)) { + error("cannot bind to " + DSOHandle::name); + } else { + // Undefined symbols are filtered out in scanRelocations(); we should never + // get here + llvm_unreachable("cannot bind to an undefined symbol"); + } +} + StubsSection::StubsSection() - : SyntheticSection(segment_names::text, "__stubs") {} + : SyntheticSection(segment_names::text, "__stubs") { + flags = MachO::S_SYMBOL_STUBS; + reserved2 = target->stubSize; +} uint64_t StubsSection::getSize() const { return entries.size() * target->stubSize; @@ -222,15 +401,17 @@ uint64_t StubsSection::getSize() const { void StubsSection::writeTo(uint8_t *buf) const { size_t off = 0; - for (const DylibSymbol *sym : in.stubs->getEntries()) { + for (const Symbol *sym : entries) { target->writeStub(buf + off, *sym); off += target->stubSize; } } -void StubsSection::addEntry(DylibSymbol &sym) { - if (entries.insert(&sym)) - sym.stubsIndex = entries.size() - 1; +bool StubsSection::addEntry(Symbol *sym) { + bool inserted = entries.insert(sym); + if (inserted) + sym->stubsIndex = entries.size() - 1; + return inserted; } StubHelperSection::StubHelperSection() @@ -238,17 +419,15 @@ StubHelperSection::StubHelperSection() uint64_t StubHelperSection::getSize() const { return target->stubHelperHeaderSize + - in.stubs->getEntries().size() * target->stubHelperEntrySize; + in.lazyBinding->getEntries().size() * target->stubHelperEntrySize; } -bool StubHelperSection::isNeeded() const { - return !in.stubs->getEntries().empty(); -} +bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); } void StubHelperSection::writeTo(uint8_t *buf) const { target->writeStubHelperHeader(buf); size_t off = target->stubHelperHeaderSize; - for (const DylibSymbol *sym : in.stubs->getEntries()) { + for (const DylibSymbol *sym : in.lazyBinding->getEntries()) { target->writeStubHelperEntry(buf + off, *sym, addr + off); off += target->stubHelperEntrySize; } @@ -261,10 +440,14 @@ void StubHelperSection::setup() { "Needed to perform lazy binding."); return; } - in.got->addEntry(*stubBinder); + stubBinder->refState = RefState::Strong; + in.got->addEntry(stubBinder); inputSections.push_back(in.imageLoaderCache); - symtab->addDefined("__dyld_private", in.imageLoaderCache, 0); + dyldPrivate = + make<Defined>("__dyld_private", in.imageLoaderCache, 0, + /*isWeakDef=*/false, + /*isExternal=*/false, /*isPrivateExtern=*/false); } ImageLoaderCacheSection::ImageLoaderCacheSection() { @@ -291,23 +474,28 @@ bool LazyPointerSection::isNeeded() const { void LazyPointerSection::writeTo(uint8_t *buf) const { size_t off = 0; - for (const DylibSymbol *sym : in.stubs->getEntries()) { - uint64_t stubHelperOffset = target->stubHelperHeaderSize + - sym->stubsIndex * target->stubHelperEntrySize; - write64le(buf + off, in.stubHelper->addr + stubHelperOffset); + for (const Symbol *sym : in.stubs->getEntries()) { + if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { + if (dysym->hasStubsHelper()) { + uint64_t stubHelperOffset = + target->stubHelperHeaderSize + + dysym->stubsHelperIndex * target->stubHelperEntrySize; + write64le(buf + off, in.stubHelper->addr + stubHelperOffset); + } + } else { + write64le(buf + off, sym->getVA()); + } off += WordSize; } } LazyBindingSection::LazyBindingSection() - : SyntheticSection(segment_names::linkEdit, section_names::lazyBinding) {} - -bool LazyBindingSection::isNeeded() const { return in.stubs->isNeeded(); } + : LinkEditSection(segment_names::linkEdit, section_names::lazyBinding) {} void LazyBindingSection::finalizeContents() { // TODO: Just precompute output size here instead of writing to a temporary // buffer - for (DylibSymbol *sym : in.stubs->getEntries()) + for (DylibSymbol *sym : entries) sym->lazyBindOffset = encode(*sym); } @@ -315,6 +503,13 @@ void LazyBindingSection::writeTo(uint8_t *buf) const { memcpy(buf, contents.data(), contents.size()); } +void LazyBindingSection::addEntry(DylibSymbol *dysym) { + if (entries.insert(dysym)) { + dysym->stubsHelperIndex = entries.size() - 1; + in.rebase->addEntry(in.lazyPointers, dysym->stubsIndex * WordSize); + } +} + // Unlike the non-lazy binding section, the bind opcodes in this section aren't // interpreted all at once. Rather, dyld will start interpreting opcodes at a // given offset, typically only binding a single symbol before it finds a @@ -329,73 +524,326 @@ uint32_t LazyBindingSection::encode(const DylibSymbol &sym) { uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr + sym.stubsIndex * WordSize; encodeULEB128(offset, os); - if (sym.file->ordinal <= MachO::BIND_IMMEDIATE_MASK) + if (sym.file->ordinal <= MachO::BIND_IMMEDIATE_MASK) { os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | sym.file->ordinal); - else - fatal("TODO: Support larger dylib symbol ordinals"); + } else { + os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + encodeULEB128(sym.file->ordinal, os); + } + + uint8_t flags = MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; + if (sym.isWeakRef()) + flags |= MachO::BIND_SYMBOL_FLAGS_WEAK_IMPORT; - os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << sym.getName() << '\0' + os << flags << sym.getName() << '\0' << static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND) << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE); return opstreamOffset; } +void macho::prepareBranchTarget(Symbol *sym) { + if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { + if (in.stubs->addEntry(dysym)) { + if (sym->isWeakDef()) { + in.binding->addEntry(dysym, in.lazyPointers, + sym->stubsIndex * WordSize); + in.weakBinding->addEntry(sym, in.lazyPointers, + sym->stubsIndex * WordSize); + } else { + in.lazyBinding->addEntry(dysym); + } + } + } else if (auto *defined = dyn_cast<Defined>(sym)) { + if (defined->isExternalWeakDef()) { + if (in.stubs->addEntry(sym)) { + in.rebase->addEntry(in.lazyPointers, sym->stubsIndex * WordSize); + in.weakBinding->addEntry(sym, in.lazyPointers, + sym->stubsIndex * WordSize); + } + } + } +} + ExportSection::ExportSection() - : SyntheticSection(segment_names::linkEdit, section_names::export_) {} + : LinkEditSection(segment_names::linkEdit, section_names::export_) {} void ExportSection::finalizeContents() { - // TODO: We should check symbol visibility. - for (const Symbol *sym : symtab->getSymbols()) - if (auto *defined = dyn_cast<Defined>(sym)) + trieBuilder.setImageBase(in.header->addr); + for (const Symbol *sym : symtab->getSymbols()) { + if (const auto *defined = dyn_cast<Defined>(sym)) { + if (defined->privateExtern) + continue; trieBuilder.addSymbol(*defined); + hasWeakSymbol = hasWeakSymbol || sym->isWeakDef(); + } + } size = trieBuilder.build(); } void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); } SymtabSection::SymtabSection(StringTableSection &stringTableSection) - : SyntheticSection(segment_names::linkEdit, section_names::symbolTable), - stringTableSection(stringTableSection) { - // TODO: When we introduce the SyntheticSections superclass, we should make - // all synthetic sections aligned to WordSize by default. - align = WordSize; -} + : LinkEditSection(segment_names::linkEdit, section_names::symbolTable), + stringTableSection(stringTableSection) {} + +uint64_t SymtabSection::getRawSize() const { + return getNumSymbols() * sizeof(structs::nlist_64); +} + +void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) { + StabsEntry stab(MachO::N_SO); + SmallString<261> dir(compileUnit->getCompilationDir()); + StringRef sep = sys::path::get_separator(); + // We don't use `path::append` here because we want an empty `dir` to result + // in an absolute path. `append` would give us a relative path for that case. + if (!dir.endswith(sep)) + dir += sep; + stab.strx = stringTableSection.addString( + saver.save(dir + compileUnit->getUnitDIE().getShortName())); + stabs.emplace_back(std::move(stab)); +} + +void SymtabSection::emitEndSourceStab() { + StabsEntry stab(MachO::N_SO); + stab.sect = 1; + stabs.emplace_back(std::move(stab)); +} + +void SymtabSection::emitObjectFileStab(ObjFile *file) { + StabsEntry stab(MachO::N_OSO); + stab.sect = target->cpuSubtype; + SmallString<261> path(!file->archiveName.empty() ? file->archiveName + : file->getName()); + std::error_code ec = sys::fs::make_absolute(path); + if (ec) + fatal("failed to get absolute path for " + path); + + if (!file->archiveName.empty()) + path.append({"(", file->getName(), ")"}); + + stab.strx = stringTableSection.addString(saver.save(path.str())); + stab.desc = 1; + stab.value = file->modTime; + stabs.emplace_back(std::move(stab)); +} + +void SymtabSection::emitEndFunStab(Defined *defined) { + StabsEntry stab(MachO::N_FUN); + // FIXME this should be the size of the symbol. Using the section size in + // lieu is only correct if .subsections_via_symbols is set. + stab.value = defined->isec->getSize(); + stabs.emplace_back(std::move(stab)); +} + +void SymtabSection::emitStabs() { + std::vector<Defined *> symbolsNeedingStabs; + for (const SymtabEntry &entry : + concat<SymtabEntry>(localSymbols, externalSymbols)) { + Symbol *sym = entry.sym; + if (auto *defined = dyn_cast<Defined>(sym)) { + if (defined->isAbsolute()) + continue; + InputSection *isec = defined->isec; + ObjFile *file = dyn_cast_or_null<ObjFile>(isec->file); + if (!file || !file->compileUnit) + continue; + symbolsNeedingStabs.push_back(defined); + } + } + + llvm::stable_sort(symbolsNeedingStabs, [&](Defined *a, Defined *b) { + return a->isec->file->id < b->isec->file->id; + }); + + // Emit STABS symbols so that dsymutil and/or the debugger can map address + // regions in the final binary to the source and object files from which they + // originated. + InputFile *lastFile = nullptr; + for (Defined *defined : symbolsNeedingStabs) { + InputSection *isec = defined->isec; + ObjFile *file = dyn_cast<ObjFile>(isec->file); + assert(file); + + if (lastFile == nullptr || lastFile != file) { + if (lastFile != nullptr) + emitEndSourceStab(); + lastFile = file; + + emitBeginSourceStab(file->compileUnit); + emitObjectFileStab(file); + } -uint64_t SymtabSection::getSize() const { - return symbols.size() * sizeof(structs::nlist_64); + StabsEntry symStab; + symStab.sect = defined->isec->parent->index; + symStab.strx = stringTableSection.addString(defined->getName()); + symStab.value = defined->getVA(); + + if (isCodeSection(isec)) { + symStab.type = MachO::N_FUN; + stabs.emplace_back(std::move(symStab)); + emitEndFunStab(defined); + } else { + symStab.type = defined->isExternal() ? MachO::N_GSYM : MachO::N_STSYM; + stabs.emplace_back(std::move(symStab)); + } + } + + if (!stabs.empty()) + emitEndSourceStab(); } void SymtabSection::finalizeContents() { - // TODO support other symbol types - for (Symbol *sym : symtab->getSymbols()) - if (isa<Defined>(sym)) - symbols.push_back({sym, stringTableSection.addString(sym->getName())}); + auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) { + uint32_t strx = stringTableSection.addString(sym->getName()); + symbols.push_back({sym, strx}); + }; + + // Local symbols aren't in the SymbolTable, so we walk the list of object + // files to gather them. + for (InputFile *file : inputFiles) { + if (auto *objFile = dyn_cast<ObjFile>(file)) { + for (Symbol *sym : objFile->symbols) { + // TODO: when we implement -dead_strip, we should filter out symbols + // that belong to dead sections. + if (auto *defined = dyn_cast<Defined>(sym)) { + if (!defined->isExternal()) + addSymbol(localSymbols, sym); + } + } + } + } + + // __dyld_private is a local symbol too. It's linker-created and doesn't + // exist in any object file. + if (Defined* dyldPrivate = in.stubHelper->dyldPrivate) + addSymbol(localSymbols, dyldPrivate); + + for (Symbol *sym : symtab->getSymbols()) { + if (auto *defined = dyn_cast<Defined>(sym)) { + assert(defined->isExternal()); + (void)defined; + addSymbol(externalSymbols, sym); + } else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { + if (dysym->isReferenced()) + addSymbol(undefinedSymbols, sym); + } + } + + emitStabs(); + uint32_t symtabIndex = stabs.size(); + for (const SymtabEntry &entry : + concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) { + entry.sym->symtabIndex = symtabIndex++; + } +} + +uint32_t SymtabSection::getNumSymbols() const { + return stabs.size() + localSymbols.size() + externalSymbols.size() + + undefinedSymbols.size(); } void SymtabSection::writeTo(uint8_t *buf) const { auto *nList = reinterpret_cast<structs::nlist_64 *>(buf); - for (const SymtabEntry &entry : symbols) { + // Emit the stabs entries before the "real" symbols. We cannot emit them + // after as that would render Symbol::symtabIndex inaccurate. + for (const StabsEntry &entry : stabs) { + nList->n_strx = entry.strx; + nList->n_type = entry.type; + nList->n_sect = entry.sect; + nList->n_desc = entry.desc; + nList->n_value = entry.value; + ++nList; + } + + for (const SymtabEntry &entry : concat<const SymtabEntry>( + localSymbols, externalSymbols, undefinedSymbols)) { nList->n_strx = entry.strx; - // TODO support other symbol types - // TODO populate n_desc + // TODO populate n_desc with more flags if (auto *defined = dyn_cast<Defined>(entry.sym)) { - nList->n_type = MachO::N_EXT | MachO::N_SECT; - nList->n_sect = defined->isec->parent->index; - // For the N_SECT symbol type, n_value is the address of the symbol - nList->n_value = defined->value + defined->isec->getVA(); + uint8_t scope = 0; + if (defined->privateExtern) { + // Private external -- dylib scoped symbol. + // Promote to non-external at link time. + assert(defined->isExternal() && "invalid input file"); + scope = MachO::N_PEXT; + } else if (defined->isExternal()) { + // Normal global symbol. + scope = MachO::N_EXT; + } else { + // TU-local symbol from localSymbols. + scope = 0; + } + + if (defined->isAbsolute()) { + nList->n_type = scope | MachO::N_ABS; + nList->n_sect = MachO::NO_SECT; + nList->n_value = defined->value; + } else { + nList->n_type = scope | MachO::N_SECT; + nList->n_sect = defined->isec->parent->index; + // For the N_SECT symbol type, n_value is the address of the symbol + nList->n_value = defined->getVA(); + } + nList->n_desc |= defined->isExternalWeakDef() ? MachO::N_WEAK_DEF : 0; + } else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) { + uint16_t n_desc = nList->n_desc; + MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->file->ordinal); + nList->n_type = MachO::N_EXT; + n_desc |= dysym->isWeakRef() ? MachO::N_WEAK_REF : 0; + nList->n_desc = n_desc; } ++nList; } } +IndirectSymtabSection::IndirectSymtabSection() + : LinkEditSection(segment_names::linkEdit, + section_names::indirectSymbolTable) {} + +uint32_t IndirectSymtabSection::getNumSymbols() const { + return in.got->getEntries().size() + in.tlvPointers->getEntries().size() + + in.stubs->getEntries().size(); +} + +bool IndirectSymtabSection::isNeeded() const { + return in.got->isNeeded() || in.tlvPointers->isNeeded() || + in.stubs->isNeeded(); +} + +void IndirectSymtabSection::finalizeContents() { + uint32_t off = 0; + in.got->reserved1 = off; + off += in.got->getEntries().size(); + in.tlvPointers->reserved1 = off; + off += in.tlvPointers->getEntries().size(); + // There is a 1:1 correspondence between stubs and LazyPointerSection + // entries, so they can share the same sub-array in the table. + in.stubs->reserved1 = in.lazyPointers->reserved1 = off; +} + +void IndirectSymtabSection::writeTo(uint8_t *buf) const { + uint32_t off = 0; + for (const Symbol *sym : in.got->getEntries()) { + write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); + ++off; + } + for (const Symbol *sym : in.tlvPointers->getEntries()) { + write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); + ++off; + } + for (const Symbol *sym : in.stubs->getEntries()) { + write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); + ++off; + } +} + StringTableSection::StringTableSection() - : SyntheticSection(segment_names::linkEdit, section_names::stringTable) {} + : LinkEditSection(segment_names::linkEdit, section_names::stringTable) {} uint32_t StringTableSection::addString(StringRef str) { uint32_t strx = size; - strings.push_back(str); + strings.push_back(str); // TODO: consider deduplicating strings size += str.size() + 1; // account for null terminator return strx; } diff --git a/contrib/llvm-project/lld/MachO/SyntheticSections.h b/contrib/llvm-project/lld/MachO/SyntheticSections.h index a8fbf6c8a265..7bca28de1386 100644 --- a/contrib/llvm-project/lld/MachO/SyntheticSections.h +++ b/contrib/llvm-project/lld/MachO/SyntheticSections.h @@ -13,29 +13,46 @@ #include "ExportTrie.h" #include "InputSection.h" #include "OutputSection.h" +#include "OutputSegment.h" #include "Target.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SetVector.h" #include "llvm/Support/raw_ostream.h" +namespace llvm { +class DWARFUnit; +} // namespace llvm + namespace lld { namespace macho { namespace section_names { constexpr const char pageZero[] = "__pagezero"; +constexpr const char common[] = "__common"; constexpr const char header[] = "__mach_header"; +constexpr const char rebase[] = "__rebase"; constexpr const char binding[] = "__binding"; +constexpr const char weakBinding[] = "__weak_binding"; constexpr const char lazyBinding[] = "__lazy_binding"; constexpr const char export_[] = "__export"; constexpr const char symbolTable[] = "__symbol_table"; +constexpr const char indirectSymbolTable[] = "__ind_sym_tab"; constexpr const char stringTable[] = "__string_table"; constexpr const char got[] = "__got"; +constexpr const char threadPtrs[] = "__thread_ptrs"; +constexpr const char unwindInfo[] = "__unwind_info"; +// these are not synthetic, but in service of synthetic __unwind_info +constexpr const char compactUnwind[] = "__compact_unwind"; +constexpr const char ehFrame[] = "__eh_frame"; } // namespace section_names +class Defined; class DylibSymbol; class LoadCommand; +class ObjFile; class SyntheticSection : public OutputSection { public: @@ -49,6 +66,32 @@ public: const StringRef segname; }; +// All sections in __LINKEDIT should inherit from this. +class LinkEditSection : public SyntheticSection { +public: + LinkEditSection(const char *segname, const char *name) + : SyntheticSection(segname, name) { + align = WordSize; + } + + // Sections in __LINKEDIT are special: their offsets are recorded in the + // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section + // headers. + bool isHidden() const override final { return true; } + + virtual uint64_t getRawSize() const = 0; + + // codesign (or more specifically libstuff) checks that each section in + // __LINKEDIT ends where the next one starts -- no gaps are permitted. We + // therefore align every section's start and end points to WordSize. + // + // NOTE: This assumes that the extra bytes required for alignment can be + // zero-valued bytes. + uint64_t getSize() const override final { + return llvm::alignTo(getRawSize(), WordSize); + } +}; + // The header of the Mach-O file, which must have a file offset of zero. class MachHeaderSection : public SyntheticSection { public: @@ -74,11 +117,13 @@ public: void writeTo(uint8_t *buf) const override {} }; -// This section will be populated by dyld with addresses to non-lazily-loaded -// dylib symbols. -class GotSection : public SyntheticSection { +// This is the base class for the GOT and TLVPointer sections, which are nearly +// functionally identical -- they will both be populated by dyld with addresses +// to non-lazily-loaded dylib symbols. The main difference is that the +// TLVPointerSection stores references to thread-local variables. +class NonLazyPointerSectionBase : public SyntheticSection { public: - GotSection(); + NonLazyPointerSectionBase(const char *segname, const char *name); const llvm::SetVector<const Symbol *> &getEntries() const { return entries; } @@ -88,38 +133,82 @@ public: void writeTo(uint8_t *buf) const override; - void addEntry(Symbol &sym); + void addEntry(Symbol *sym); private: llvm::SetVector<const Symbol *> entries; }; +class GotSection : public NonLazyPointerSectionBase { +public: + GotSection() + : NonLazyPointerSectionBase(segment_names::dataConst, + section_names::got) { + // TODO: section_64::reserved1 should be an index into the indirect symbol + // table, which we do not currently emit + } +}; + +class TlvPointerSection : public NonLazyPointerSectionBase { +public: + TlvPointerSection() + : NonLazyPointerSectionBase(segment_names::data, + section_names::threadPtrs) {} +}; + +using SectionPointerUnion = + llvm::PointerUnion<const InputSection *, const OutputSection *>; + +struct Location { + SectionPointerUnion section = nullptr; + uint64_t offset = 0; + + Location(SectionPointerUnion section, uint64_t offset) + : section(section), offset(offset) {} + uint64_t getVA() const; +}; + +// Stores rebase opcodes, which tell dyld where absolute addresses have been +// encoded in the binary. If the binary is not loaded at its preferred address, +// dyld has to rebase these addresses by adding an offset to them. +class RebaseSection : public LinkEditSection { +public: + RebaseSection(); + void finalizeContents(); + uint64_t getRawSize() const override { return contents.size(); } + bool isNeeded() const override { return !locations.empty(); } + void writeTo(uint8_t *buf) const override; + + void addEntry(SectionPointerUnion section, uint64_t offset) { + if (config->isPic) + locations.push_back({section, offset}); + } + +private: + std::vector<Location> locations; + SmallVector<char, 128> contents; +}; + struct BindingEntry { const DylibSymbol *dysym; - const InputSection *isec; - uint64_t offset; int64_t addend; - BindingEntry(const DylibSymbol *dysym, const InputSection *isec, - uint64_t offset, int64_t addend) - : dysym(dysym), isec(isec), offset(offset), addend(addend) {} + Location target; + BindingEntry(const DylibSymbol *dysym, int64_t addend, Location target) + : dysym(dysym), addend(addend), target(std::move(target)) {} }; // Stores bind opcodes for telling dyld which symbols to load non-lazily. -class BindingSection : public SyntheticSection { +class BindingSection : public LinkEditSection { public: BindingSection(); void finalizeContents(); - uint64_t getSize() const override { return contents.size(); } - // Like other sections in __LINKEDIT, the binding section is special: its - // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in - // section headers. - bool isHidden() const override { return true; } - bool isNeeded() const override; + uint64_t getRawSize() const override { return contents.size(); } + bool isNeeded() const override { return !bindings.empty(); } void writeTo(uint8_t *buf) const override; - void addEntry(const DylibSymbol *dysym, const InputSection *isec, - uint64_t offset, int64_t addend) { - bindings.emplace_back(dysym, isec, offset, addend); + void addEntry(const DylibSymbol *dysym, SectionPointerUnion section, + uint64_t offset, int64_t addend = 0) { + bindings.emplace_back(dysym, addend, Location(section, offset)); } private: @@ -127,16 +216,73 @@ private: SmallVector<char, 128> contents; }; +struct WeakBindingEntry { + const Symbol *symbol; + int64_t addend; + Location target; + WeakBindingEntry(const Symbol *symbol, int64_t addend, Location target) + : symbol(symbol), addend(addend), target(std::move(target)) {} +}; + +// Stores bind opcodes for telling dyld which weak symbols need coalescing. +// There are two types of entries in this section: +// +// 1) Non-weak definitions: This is a symbol definition that weak symbols in +// other dylibs should coalesce to. +// +// 2) Weak bindings: These tell dyld that a given symbol reference should +// coalesce to a non-weak definition if one is found. Note that unlike in the +// entries in the BindingSection, the bindings here only refer to these +// symbols by name, but do not specify which dylib to load them from. +class WeakBindingSection : public LinkEditSection { +public: + WeakBindingSection(); + void finalizeContents(); + uint64_t getRawSize() const override { return contents.size(); } + bool isNeeded() const override { + return !bindings.empty() || !definitions.empty(); + } + + void writeTo(uint8_t *buf) const override; + + void addEntry(const Symbol *symbol, SectionPointerUnion section, + uint64_t offset, int64_t addend = 0) { + bindings.emplace_back(symbol, addend, Location(section, offset)); + } + + bool hasEntry() const { return !bindings.empty(); } + + void addNonWeakDefinition(const Defined *defined) { + definitions.emplace_back(defined); + } + + bool hasNonWeakDefinition() const { return !definitions.empty(); } + +private: + std::vector<WeakBindingEntry> bindings; + std::vector<const Defined *> definitions; + SmallVector<char, 128> contents; +}; + +// Whether a given symbol's address can only be resolved at runtime. +bool needsBinding(const Symbol *); + +// Add bindings for symbols that need weak or non-lazy bindings. +void addNonLazyBindingEntries(const Symbol *, SectionPointerUnion, + uint64_t offset, int64_t addend = 0); + // The following sections implement lazy symbol binding -- very similar to the // PLT mechanism in ELF. // -// ELF's .plt section is broken up into two sections in Mach-O: StubsSection and -// StubHelperSection. Calls to functions in dylibs will end up calling into +// ELF's .plt section is broken up into two sections in Mach-O: StubsSection +// and StubHelperSection. Calls to functions in dylibs will end up calling into // StubsSection, which contains indirect jumps to addresses stored in the // LazyPointerSection (the counterpart to ELF's .plt.got). // -// Initially, the LazyPointerSection contains addresses that point into one of -// the entry points in the middle of the StubHelperSection. The code in +// We will first describe how non-weak symbols are handled. +// +// At program start, the LazyPointerSection contains addresses that point into +// one of the entry points in the middle of the StubHelperSection. The code in // StubHelperSection will push on the stack an offset into the // LazyBindingSection. The push is followed by a jump to the beginning of the // StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder. @@ -144,10 +290,17 @@ private: // the GOT. // // The stub binder will look up the bind opcodes in the LazyBindingSection at -// the given offset. The bind opcodes will tell the binder to update the address -// in the LazyPointerSection to point to the symbol, so that subsequent calls -// don't have to redo the symbol resolution. The binder will then jump to the -// resolved symbol. +// the given offset. The bind opcodes will tell the binder to update the +// address in the LazyPointerSection to point to the symbol, so that subsequent +// calls don't have to redo the symbol resolution. The binder will then jump to +// the resolved symbol. +// +// With weak symbols, the situation is slightly different. Since there is no +// "weak lazy" lookup, function calls to weak symbols are always non-lazily +// bound. We emit both regular non-lazy bindings as well as weak bindings, in +// order that the weak bindings may overwrite the non-lazy bindings if an +// appropriate symbol is found at runtime. However, the bound addresses will +// still be written (non-lazily) into the LazyPointerSection. class StubsSection : public SyntheticSection { public: @@ -155,13 +308,13 @@ public: uint64_t getSize() const override; bool isNeeded() const override { return !entries.empty(); } void writeTo(uint8_t *buf) const override; - - const llvm::SetVector<DylibSymbol *> &getEntries() const { return entries; } - - void addEntry(DylibSymbol &sym); + const llvm::SetVector<Symbol *> &getEntries() const { return entries; } + // Returns whether the symbol was added. Note that every stubs entry will + // have a corresponding entry in the LazyPointerSection. + bool addEntry(Symbol *); private: - llvm::SetVector<DylibSymbol *> entries; + llvm::SetVector<Symbol *> entries; }; class StubHelperSection : public SyntheticSection { @@ -174,6 +327,7 @@ public: void setup(); DylibSymbol *stubBinder = nullptr; + Defined *dyldPrivate = nullptr; }; // This section contains space for just a single word, and will be used by dyld @@ -186,6 +340,8 @@ public: uint64_t getSize() const override { return WordSize; } }; +// Note that this section may also be targeted by non-lazy bindings. In +// particular, this happens when branch relocations target weak symbols. class LazyPointerSection : public SyntheticSection { public: LazyPointerSection(); @@ -194,60 +350,59 @@ public: void writeTo(uint8_t *buf) const override; }; -class LazyBindingSection : public SyntheticSection { +class LazyBindingSection : public LinkEditSection { public: LazyBindingSection(); void finalizeContents(); - uint64_t getSize() const override { return contents.size(); } - uint32_t encode(const DylibSymbol &); - // Like other sections in __LINKEDIT, the lazy binding section is special: its - // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in - // section headers. - bool isHidden() const override { return true; } - bool isNeeded() const override; + uint64_t getRawSize() const override { return contents.size(); } + bool isNeeded() const override { return !entries.empty(); } void writeTo(uint8_t *buf) const override; + // Note that every entry here will by referenced by a corresponding entry in + // the StubHelperSection. + void addEntry(DylibSymbol *dysym); + const llvm::SetVector<DylibSymbol *> &getEntries() const { return entries; } private: + uint32_t encode(const DylibSymbol &); + + llvm::SetVector<DylibSymbol *> entries; SmallVector<char, 128> contents; llvm::raw_svector_ostream os{contents}; }; +// Adds stubs and bindings where necessary (e.g. if the symbol is a +// DylibSymbol.) +void prepareBranchTarget(Symbol *); + // Stores a trie that describes the set of exported symbols. -class ExportSection : public SyntheticSection { +class ExportSection : public LinkEditSection { public: ExportSection(); void finalizeContents(); - uint64_t getSize() const override { return size; } - // Like other sections in __LINKEDIT, the export section is special: its - // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in - // section headers. - bool isHidden() const override { return true; } + uint64_t getRawSize() const override { return size; } void writeTo(uint8_t *buf) const override; + bool hasWeakSymbol = false; + private: TrieBuilder trieBuilder; size_t size = 0; }; // Stores the strings referenced by the symbol table. -class StringTableSection : public SyntheticSection { +class StringTableSection : public LinkEditSection { public: StringTableSection(); // Returns the start offset of the added string. uint32_t addString(StringRef); - uint64_t getSize() const override { return size; } - // Like other sections in __LINKEDIT, the string table section is special: its - // offsets are recorded in the LC_SYMTAB load command, instead of in section - // headers. - bool isHidden() const override { return true; } + uint64_t getRawSize() const override { return size; } void writeTo(uint8_t *buf) const override; private: - // An n_strx value of 0 always indicates the empty string, so we must locate - // our non-empty string values at positive offsets in the string table. - // Therefore we insert a dummy value at position zero. - std::vector<StringRef> strings{"\0"}; - size_t size = 1; + // ld64 emits string tables which start with a space and a zero byte. We + // match its behavior here since some tools depend on it. + std::vector<StringRef> strings{" "}; + size_t size = 2; }; struct SymtabEntry { @@ -255,26 +410,81 @@ struct SymtabEntry { size_t strx; }; -class SymtabSection : public SyntheticSection { +struct StabsEntry { + uint8_t type = 0; + uint32_t strx = 0; + uint8_t sect = 0; + uint16_t desc = 0; + uint64_t value = 0; + + StabsEntry() = default; + explicit StabsEntry(uint8_t type) : type(type) {} +}; + +// Symbols of the same type must be laid out contiguously: we choose to emit +// all local symbols first, then external symbols, and finally undefined +// symbols. For each symbol type, the LC_DYSYMTAB load command will record the +// range (start index and total number) of those symbols in the symbol table. +class SymtabSection : public LinkEditSection { public: SymtabSection(StringTableSection &); void finalizeContents(); - size_t getNumSymbols() const { return symbols.size(); } - uint64_t getSize() const override; - // Like other sections in __LINKEDIT, the symtab section is special: its - // offsets are recorded in the LC_SYMTAB load command, instead of in section - // headers. - bool isHidden() const override { return true; } + uint32_t getNumSymbols() const; + uint32_t getNumLocalSymbols() const { + return stabs.size() + localSymbols.size(); + } + uint32_t getNumExternalSymbols() const { return externalSymbols.size(); } + uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); } + uint64_t getRawSize() const override; void writeTo(uint8_t *buf) const override; private: + void emitBeginSourceStab(llvm::DWARFUnit *compileUnit); + void emitEndSourceStab(); + void emitObjectFileStab(ObjFile *); + void emitEndFunStab(Defined *); + void emitStabs(); + StringTableSection &stringTableSection; - std::vector<SymtabEntry> symbols; + // STABS symbols are always local symbols, but we represent them with special + // entries because they may use fields like n_sect and n_desc differently. + std::vector<StabsEntry> stabs; + std::vector<SymtabEntry> localSymbols; + std::vector<SymtabEntry> externalSymbols; + std::vector<SymtabEntry> undefinedSymbols; +}; + +// The indirect symbol table is a list of 32-bit integers that serve as indices +// into the (actual) symbol table. The indirect symbol table is a +// concatenation of several sub-arrays of indices, each sub-array belonging to +// a separate section. The starting offset of each sub-array is stored in the +// reserved1 header field of the respective section. +// +// These sub-arrays provide symbol information for sections that store +// contiguous sequences of symbol references. These references can be pointers +// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g. +// function stubs). +class IndirectSymtabSection : public LinkEditSection { +public: + IndirectSymtabSection(); + void finalizeContents(); + uint32_t getNumSymbols() const; + uint64_t getRawSize() const override { + return getNumSymbols() * sizeof(uint32_t); + } + bool isNeeded() const override; + void writeTo(uint8_t *buf) const override; }; struct InStruct { + MachHeaderSection *header = nullptr; + RebaseSection *rebase = nullptr; BindingSection *binding = nullptr; + WeakBindingSection *weakBinding = nullptr; + LazyBindingSection *lazyBinding = nullptr; + ExportSection *exports = nullptr; GotSection *got = nullptr; + TlvPointerSection *tlvPointers = nullptr; LazyPointerSection *lazyPointers = nullptr; StubsSection *stubs = nullptr; StubHelperSection *stubHelper = nullptr; diff --git a/contrib/llvm-project/lld/MachO/Target.h b/contrib/llvm-project/lld/MachO/Target.h index 8ea1bde12307..8537803160e6 100644 --- a/contrib/llvm-project/lld/MachO/Target.h +++ b/contrib/llvm-project/lld/MachO/Target.h @@ -37,25 +37,29 @@ public: virtual ~TargetInfo() = default; // Validate the relocation structure and get its addend. - virtual uint64_t - getImplicitAddend(llvm::MemoryBufferRef, const llvm::MachO::section_64 &, - const llvm::MachO::relocation_info &) const = 0; + virtual uint64_t getAddend(llvm::MemoryBufferRef, + const llvm::MachO::section_64 &, + llvm::MachO::relocation_info, + llvm::MachO::relocation_info) const = 0; + virtual bool isPairedReloc(llvm::MachO::relocation_info) const = 0; virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const = 0; // Write code for lazy binding. See the comments on StubsSection for more // details. - virtual void writeStub(uint8_t *buf, const DylibSymbol &) const = 0; + virtual void writeStub(uint8_t *buf, const Symbol &) const = 0; virtual void writeStubHelperHeader(uint8_t *buf) const = 0; virtual void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, uint64_t entryAddr) const = 0; // Symbols may be referenced via either the GOT or the stubs section, // depending on the relocation type. prepareSymbolRelocation() will set up the - // GOT/stubs entries, and getSymbolVA() will return the addresses of those - // entries. - virtual void prepareSymbolRelocation(Symbol &, const InputSection *, + // GOT/stubs entries, and resolveSymbolVA() will return the addresses of those + // entries. resolveSymbolVA() may also relax the target instructions to save + // on a level of address indirection. + virtual void prepareSymbolRelocation(Symbol *, const InputSection *, const Reloc &) = 0; - virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0; + virtual uint64_t resolveSymbolVA(uint8_t *buf, const Symbol &, + uint8_t type) const = 0; uint32_t cpuType; uint32_t cpuSubtype; diff --git a/contrib/llvm-project/lld/MachO/UnwindInfoSection.cpp b/contrib/llvm-project/lld/MachO/UnwindInfoSection.cpp new file mode 100644 index 000000000000..afaa69dac8dc --- /dev/null +++ b/contrib/llvm-project/lld/MachO/UnwindInfoSection.cpp @@ -0,0 +1,340 @@ +//===- UnwindInfoSection.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UnwindInfoSection.h" +#include "Config.h" +#include "InputSection.h" +#include "MergedOutputSection.h" +#include "OutputSection.h" +#include "OutputSegment.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" + +#include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/MachO.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace lld; +using namespace lld::macho; + +#define COMMON_ENCODINGS_MAX 127 +#define COMPACT_ENCODINGS_MAX 256 + +#define SECOND_LEVEL_PAGE_BYTES 4096 +#define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t)) +#define REGULAR_SECOND_LEVEL_ENTRIES_MAX \ + ((SECOND_LEVEL_PAGE_BYTES - \ + sizeof(unwind_info_regular_second_level_page_header)) / \ + sizeof(unwind_info_regular_second_level_entry)) +#define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \ + ((SECOND_LEVEL_PAGE_BYTES - \ + sizeof(unwind_info_compressed_second_level_page_header)) / \ + sizeof(uint32_t)) + +#define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24 +#define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \ + UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0) + +// Compact Unwind format is a Mach-O evolution of DWARF Unwind that +// optimizes space and exception-time lookup. Most DWARF unwind +// entries can be replaced with Compact Unwind entries, but the ones +// that cannot are retained in DWARF form. +// +// This comment will address macro-level organization of the pre-link +// and post-link compact unwind tables. For micro-level organization +// pertaining to the bitfield layout of the 32-bit compact unwind +// entries, see libunwind/include/mach-o/compact_unwind_encoding.h +// +// Important clarifying factoids: +// +// * __LD,__compact_unwind is the compact unwind format for compiler +// output and linker input. It is never a final output. It could be +// an intermediate output with the `-r` option which retains relocs. +// +// * __TEXT,__unwind_info is the compact unwind format for final +// linker output. It is never an input. +// +// * __TEXT,__eh_frame is the DWARF format for both linker input and output. +// +// * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd +// level) by ascending address, and the pages are referenced by an +// index (1st level) in the section header. +// +// * Following the headers in __TEXT,__unwind_info, the bulk of the +// section contains a vector of compact unwind entries +// `{functionOffset, encoding}` sorted by ascending `functionOffset`. +// Adjacent entries with the same encoding can be folded to great +// advantage, achieving a 3-order-of-magnitude reduction in the +// number of entries. +// +// * The __TEXT,__unwind_info format can accommodate up to 127 unique +// encodings for the space-efficient compressed format. In practice, +// fewer than a dozen unique encodings are used by C++ programs of +// all sizes. Therefore, we don't even bother implementing the regular +// non-compressed format. Time will tell if anyone in the field ever +// overflows the 127-encodings limit. + +// TODO(gkm): prune __eh_frame entries superseded by __unwind_info +// TODO(gkm): how do we align the 2nd-level pages? + +UnwindInfoSection::UnwindInfoSection() + : SyntheticSection(segment_names::text, section_names::unwindInfo) { + align = WordSize; // TODO(gkm): make this 4 KiB ? +} + +bool UnwindInfoSection::isNeeded() const { + return (compactUnwindSection != nullptr); +} + +// Scan the __LD,__compact_unwind entries and compute the space needs of +// __TEXT,__unwind_info and __TEXT,__eh_frame + +void UnwindInfoSection::finalize() { + if (compactUnwindSection == nullptr) + return; + + // At this point, the address space for __TEXT,__text has been + // assigned, so we can relocate the __LD,__compact_unwind entries + // into a temporary buffer. Relocation is necessary in order to sort + // the CU entries by function address. Sorting is necessary so that + // we can fold adjacent CU entries with identical + // encoding+personality+lsda. Folding is necessary because it reduces + // the number of CU entries by as much as 3 orders of magnitude! + compactUnwindSection->finalize(); + assert(compactUnwindSection->getSize() % sizeof(CompactUnwindEntry64) == 0); + size_t cuCount = + compactUnwindSection->getSize() / sizeof(CompactUnwindEntry64); + cuVector.resize(cuCount); + // Relocate all __LD,__compact_unwind entries + compactUnwindSection->writeTo(reinterpret_cast<uint8_t *>(cuVector.data())); + + // Rather than sort & fold the 32-byte entries directly, we create a + // vector of pointers to entries and sort & fold that instead. + cuPtrVector.reserve(cuCount); + for (const CompactUnwindEntry64 &cuEntry : cuVector) + cuPtrVector.emplace_back(&cuEntry); + std::sort(cuPtrVector.begin(), cuPtrVector.end(), + [](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) { + return a->functionAddress < b->functionAddress; + }); + + // Fold adjacent entries with matching encoding+personality+lsda + // We use three iterators on the same cuPtrVector to fold in-situ: + // (1) `foldBegin` is the first of a potential sequence of matching entries + // (2) `foldEnd` is the first non-matching entry after `foldBegin`. + // The semi-open interval [ foldBegin .. foldEnd ) contains a range + // entries that can be folded into a single entry and written to ... + // (3) `foldWrite` + auto foldWrite = cuPtrVector.begin(); + for (auto foldBegin = cuPtrVector.begin(); foldBegin < cuPtrVector.end();) { + auto foldEnd = foldBegin; + while (++foldEnd < cuPtrVector.end() && + (*foldBegin)->encoding == (*foldEnd)->encoding && + (*foldBegin)->personality == (*foldEnd)->personality && + (*foldBegin)->lsda == (*foldEnd)->lsda) + ; + *foldWrite++ = *foldBegin; + foldBegin = foldEnd; + } + cuPtrVector.erase(foldWrite, cuPtrVector.end()); + + // Count frequencies of the folded encodings + EncodingMap encodingFrequencies; + for (auto cuPtrEntry : cuPtrVector) + encodingFrequencies[cuPtrEntry->encoding]++; + + // Make a vector of encodings, sorted by descending frequency + for (const auto &frequency : encodingFrequencies) + commonEncodings.emplace_back(frequency); + std::sort(commonEncodings.begin(), commonEncodings.end(), + [](const std::pair<compact_unwind_encoding_t, size_t> &a, + const std::pair<compact_unwind_encoding_t, size_t> &b) { + if (a.second == b.second) + // When frequencies match, secondarily sort on encoding + // to maintain parity with validate-unwind-info.py + return a.first > b.first; + return a.second > b.second; + }); + + // Truncate the vector to 127 elements. + // Common encoding indexes are limited to 0..126, while encoding + // indexes 127..255 are local to each second-level page + if (commonEncodings.size() > COMMON_ENCODINGS_MAX) + commonEncodings.resize(COMMON_ENCODINGS_MAX); + + // Create a map from encoding to common-encoding-table index + for (size_t i = 0; i < commonEncodings.size(); i++) + commonEncodingIndexes[commonEncodings[i].first] = i; + + // Split folded encodings into pages, where each page is limited by ... + // (a) 4 KiB capacity + // (b) 24-bit difference between first & final function address + // (c) 8-bit compact-encoding-table index, + // for which 0..126 references the global common-encodings table, + // and 127..255 references a local per-second-level-page table. + // First we try the compact format and determine how many entries fit. + // If more entries fit in the regular format, we use that. + for (size_t i = 0; i < cuPtrVector.size();) { + secondLevelPages.emplace_back(); + auto &page = secondLevelPages.back(); + page.entryIndex = i; + uintptr_t functionAddressMax = + cuPtrVector[i]->functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK; + size_t n = commonEncodings.size(); + size_t wordsRemaining = + SECOND_LEVEL_PAGE_WORDS - + sizeof(unwind_info_compressed_second_level_page_header) / + sizeof(uint32_t); + while (wordsRemaining >= 1 && i < cuPtrVector.size()) { + const auto *cuPtr = cuPtrVector[i]; + if (cuPtr->functionAddress >= functionAddressMax) { + break; + } else if (commonEncodingIndexes.count(cuPtr->encoding) || + page.localEncodingIndexes.count(cuPtr->encoding)) { + i++; + wordsRemaining--; + } else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) { + page.localEncodings.emplace_back(cuPtr->encoding); + page.localEncodingIndexes[cuPtr->encoding] = n++; + i++; + wordsRemaining -= 2; + } else { + break; + } + } + page.entryCount = i - page.entryIndex; + + // If this is not the final page, see if it's possible to fit more + // entries by using the regular format. This can happen when there + // are many unique encodings, and we we saturated the local + // encoding table early. + if (i < cuPtrVector.size() && + page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) { + page.kind = UNWIND_SECOND_LEVEL_REGULAR; + page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX, + cuPtrVector.size() - page.entryIndex); + i = page.entryIndex + page.entryCount; + } else { + page.kind = UNWIND_SECOND_LEVEL_COMPRESSED; + } + } + + // compute size of __TEXT,__unwind_info section + level2PagesOffset = + sizeof(unwind_info_section_header) + + commonEncodings.size() * sizeof(uint32_t) + + personalities.size() * sizeof(uint32_t) + + // The extra second-level-page entry is for the sentinel + (secondLevelPages.size() + 1) * + sizeof(unwind_info_section_header_index_entry) + + lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry); + unwindInfoSize = + level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES; +} + +// All inputs are relocated and output addresses are known, so write! + +void UnwindInfoSection::writeTo(uint8_t *buf) const { + // section header + auto *uip = reinterpret_cast<unwind_info_section_header *>(buf); + uip->version = 1; + uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header); + uip->commonEncodingsArrayCount = commonEncodings.size(); + uip->personalityArraySectionOffset = + uip->commonEncodingsArraySectionOffset + + (uip->commonEncodingsArrayCount * sizeof(uint32_t)); + uip->personalityArrayCount = personalities.size(); + uip->indexSectionOffset = uip->personalityArraySectionOffset + + (uip->personalityArrayCount * sizeof(uint32_t)); + uip->indexCount = secondLevelPages.size() + 1; + + // Common encodings + auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]); + for (const auto &encoding : commonEncodings) + *i32p++ = encoding.first; + + // Personalities + for (const uint32_t &personality : personalities) + *i32p++ = personality; + + // Level-1 index + uint32_t lsdaOffset = + uip->indexSectionOffset + + uip->indexCount * sizeof(unwind_info_section_header_index_entry); + uint64_t l2PagesOffset = level2PagesOffset; + auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p); + for (const SecondLevelPage &page : secondLevelPages) { + iep->functionOffset = cuPtrVector[page.entryIndex]->functionAddress; + iep->secondLevelPagesSectionOffset = l2PagesOffset; + iep->lsdaIndexArraySectionOffset = lsdaOffset; + iep++; + l2PagesOffset += SECOND_LEVEL_PAGE_BYTES; + } + // Level-1 sentinel + const CompactUnwindEntry64 &cuEnd = cuVector.back(); + iep->functionOffset = cuEnd.functionAddress + cuEnd.functionLength; + iep->secondLevelPagesSectionOffset = 0; + iep->lsdaIndexArraySectionOffset = lsdaOffset; + iep++; + + // LSDAs + auto *lep = + reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep); + for (const unwind_info_section_header_lsda_index_entry &lsda : lsdaEntries) { + lep->functionOffset = lsda.functionOffset; + lep->lsdaOffset = lsda.lsdaOffset; + } + + // Level-2 pages + auto *pp = reinterpret_cast<uint32_t *>(lep); + for (const SecondLevelPage &page : secondLevelPages) { + if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) { + uintptr_t functionAddressBase = + cuPtrVector[page.entryIndex]->functionAddress; + auto *p2p = + reinterpret_cast<unwind_info_compressed_second_level_page_header *>( + pp); + p2p->kind = page.kind; + p2p->entryPageOffset = + sizeof(unwind_info_compressed_second_level_page_header); + p2p->entryCount = page.entryCount; + p2p->encodingsPageOffset = + p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t); + p2p->encodingsCount = page.localEncodings.size(); + auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); + for (size_t i = 0; i < page.entryCount; i++) { + const CompactUnwindEntry64 *cuep = cuPtrVector[page.entryIndex + i]; + auto it = commonEncodingIndexes.find(cuep->encoding); + if (it == commonEncodingIndexes.end()) + it = page.localEncodingIndexes.find(cuep->encoding); + *ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) | + (cuep->functionAddress - functionAddressBase); + } + if (page.localEncodings.size() != 0) + memcpy(ep, page.localEncodings.data(), + page.localEncodings.size() * sizeof(uint32_t)); + } else { + auto *p2p = + reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp); + p2p->kind = page.kind; + p2p->entryPageOffset = + sizeof(unwind_info_regular_second_level_page_header); + p2p->entryCount = page.entryCount; + auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); + for (size_t i = 0; i < page.entryCount; i++) { + const CompactUnwindEntry64 *cuep = cuPtrVector[page.entryIndex + i]; + *ep++ = cuep->functionAddress; + *ep++ = cuep->encoding; + } + } + pp += SECOND_LEVEL_PAGE_WORDS; + } +} diff --git a/contrib/llvm-project/lld/MachO/UnwindInfoSection.h b/contrib/llvm-project/lld/MachO/UnwindInfoSection.h new file mode 100644 index 000000000000..2285cf930d83 --- /dev/null +++ b/contrib/llvm-project/lld/MachO/UnwindInfoSection.h @@ -0,0 +1,79 @@ +//===- UnwindInfoSection.h ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_UNWIND_INFO_H +#define LLD_MACHO_UNWIND_INFO_H + +#include "MergedOutputSection.h" +#include "SyntheticSections.h" + +#include "mach-o/compact_unwind_encoding.h" +#include "llvm/ADT/DenseMap.h" + +#include <vector> + +// In 2020, we mostly care about 64-bit targets: x86_64 and arm64 +struct CompactUnwindEntry64 { + uint64_t functionAddress; + uint32_t functionLength; + compact_unwind_encoding_t encoding; + uint64_t personality; + uint64_t lsda; +}; + +// FIXME(gkm): someday we might care about 32-bit targets: x86 & arm +struct CompactUnwindEntry32 { + uint32_t functionAddress; + uint32_t functionLength; + compact_unwind_encoding_t encoding; + uint32_t personality; + uint32_t lsda; +}; + +namespace lld { +namespace macho { + +class UnwindInfoSection : public SyntheticSection { +public: + UnwindInfoSection(); + uint64_t getSize() const override { return unwindInfoSize; } + bool isNeeded() const override; + void finalize() override; + void writeTo(uint8_t *buf) const override; + void setCompactUnwindSection(MergedOutputSection *cuSection) { + compactUnwindSection = cuSection; + } + + using EncodingMap = llvm::DenseMap<compact_unwind_encoding_t, size_t>; + + struct SecondLevelPage { + uint32_t kind; + size_t entryIndex; + size_t entryCount; + size_t byteCount; + std::vector<compact_unwind_encoding_t> localEncodings; + EncodingMap localEncodingIndexes; + }; + +private: + std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings; + EncodingMap commonEncodingIndexes; + std::vector<uint32_t> personalities; + std::vector<unwind_info_section_header_lsda_index_entry> lsdaEntries; + std::vector<CompactUnwindEntry64> cuVector; + std::vector<const CompactUnwindEntry64 *> cuPtrVector; + std::vector<SecondLevelPage> secondLevelPages; + MergedOutputSection *compactUnwindSection = nullptr; + uint64_t level2PagesOffset = 0; + uint64_t unwindInfoSize = 0; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/contrib/llvm-project/lld/MachO/Writer.cpp b/contrib/llvm-project/lld/MachO/Writer.cpp index 03000a7f437e..bdc3609e033d 100644 --- a/contrib/llvm-project/lld/MachO/Writer.cpp +++ b/contrib/llvm-project/lld/MachO/Writer.cpp @@ -17,57 +17,65 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "UnwindInfoSection.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" +#include "llvm/Support/xxhash.h" + +#include <algorithm> using namespace llvm; using namespace llvm::MachO; +using namespace llvm::sys; using namespace lld; using namespace lld::macho; namespace { -class LCLinkEdit; -class LCDyldInfo; -class LCSymtab; +class LCUuid; class Writer { public: Writer() : buffer(errorHandler().outputBuffer) {} void scanRelocations(); + void scanSymbols(); void createOutputSections(); void createLoadCommands(); void assignAddresses(OutputSegment *); - void createSymtabContents(); void openFile(); void writeSections(); + void writeUuid(); void run(); std::unique_ptr<FileOutputBuffer> &buffer; uint64_t addr = 0; uint64_t fileOff = 0; - MachHeaderSection *headerSection = nullptr; - LazyBindingSection *lazyBindingSection = nullptr; - ExportSection *exportSection = nullptr; + MachHeaderSection *header = nullptr; StringTableSection *stringTableSection = nullptr; SymtabSection *symtabSection = nullptr; + IndirectSymtabSection *indirectSymtabSection = nullptr; + UnwindInfoSection *unwindInfoSection = nullptr; + LCUuid *uuidCommand = nullptr; }; // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. class LCDyldInfo : public LoadCommand { public: - LCDyldInfo(BindingSection *bindingSection, + LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection, + WeakBindingSection *weakBindingSection, LazyBindingSection *lazyBindingSection, ExportSection *exportSection) - : bindingSection(bindingSection), lazyBindingSection(lazyBindingSection), - exportSection(exportSection) {} + : rebaseSection(rebaseSection), bindingSection(bindingSection), + weakBindingSection(weakBindingSection), + lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} uint32_t getSize() const override { return sizeof(dyld_info_command); } @@ -75,10 +83,18 @@ public: auto *c = reinterpret_cast<dyld_info_command *>(buf); c->cmd = LC_DYLD_INFO_ONLY; c->cmdsize = getSize(); + if (rebaseSection->isNeeded()) { + c->rebase_off = rebaseSection->fileOff; + c->rebase_size = rebaseSection->getFileSize(); + } if (bindingSection->isNeeded()) { c->bind_off = bindingSection->fileOff; c->bind_size = bindingSection->getFileSize(); } + if (weakBindingSection->isNeeded()) { + c->weak_bind_off = weakBindingSection->fileOff; + c->weak_bind_size = weakBindingSection->getFileSize(); + } if (lazyBindingSection->isNeeded()) { c->lazy_bind_off = lazyBindingSection->fileOff; c->lazy_bind_size = lazyBindingSection->getFileSize(); @@ -89,20 +105,39 @@ public: } } + RebaseSection *rebaseSection; BindingSection *bindingSection; + WeakBindingSection *weakBindingSection; LazyBindingSection *lazyBindingSection; ExportSection *exportSection; }; class LCDysymtab : public LoadCommand { public: + LCDysymtab(SymtabSection *symtabSection, + IndirectSymtabSection *indirectSymtabSection) + : symtabSection(symtabSection), + indirectSymtabSection(indirectSymtabSection) {} + uint32_t getSize() const override { return sizeof(dysymtab_command); } void writeTo(uint8_t *buf) const override { auto *c = reinterpret_cast<dysymtab_command *>(buf); c->cmd = LC_DYSYMTAB; c->cmdsize = getSize(); + + c->ilocalsym = 0; + c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); + c->nextdefsym = symtabSection->getNumExternalSymbols(); + c->iundefsym = c->iextdefsym + c->nextdefsym; + c->nundefsym = symtabSection->getNumUndefinedSymbols(); + + c->indirectsymoff = indirectSymtabSection->fileOff; + c->nindirectsyms = indirectSymtabSection->getNumSymbols(); } + + SymtabSection *symtabSection; + IndirectSymtabSection *indirectSymtabSection; }; class LCSegment : public LoadCommand { @@ -134,7 +169,11 @@ public: c->nsects = seg->numNonHiddenSections(); for (OutputSection *osec : seg->getSections()) { - c->filesize += osec->getFileSize(); + if (!isZeroFill(osec->flags)) { + assert(osec->fileOff >= seg->fileOff); + c->filesize = std::max( + c->filesize, osec->fileOff + osec->getFileSize() - seg->fileOff); + } if (osec->isHidden()) continue; @@ -150,6 +189,8 @@ public: sectHdr->align = Log2_32(osec->align); sectHdr->flags = osec->flags; sectHdr->size = osec->getSize(); + sectHdr->reserved1 = osec->reserved1; + sectHdr->reserved2 = osec->reserved2; } } @@ -165,7 +206,13 @@ class LCMain : public LoadCommand { auto *c = reinterpret_cast<entry_point_command *>(buf); c->cmd = LC_MAIN; c->cmdsize = getSize(); - c->entryoff = config->entry->getFileOffset(); + + if (config->entry->isInStubs()) + c->entryoff = + in.stubs->fileOff + config->entry->stubsIndex * target->stubSize; + else + c->entryoff = config->entry->getFileOffset(); + c->stacksize = 0; } }; @@ -197,7 +244,12 @@ public: // * LC_REEXPORT_DYLIB class LCDylib : public LoadCommand { public: - LCDylib(LoadCommandType type, StringRef path) : type(type), path(path) {} + LCDylib(LoadCommandType type, StringRef path, + uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0) + : type(type), path(path), compatibilityVersion(compatibilityVersion), + currentVersion(currentVersion) { + instanceCount++; + } uint32_t getSize() const override { return alignTo(sizeof(dylib_command) + path.size() + 1, 8); @@ -210,16 +262,26 @@ public: c->cmd = type; c->cmdsize = getSize(); c->dylib.name = sizeof(dylib_command); + c->dylib.timestamp = 0; + c->dylib.compatibility_version = compatibilityVersion; + c->dylib.current_version = currentVersion; memcpy(buf, path.data(), path.size()); buf[path.size()] = '\0'; } + static uint32_t getInstanceCount() { return instanceCount; } + private: LoadCommandType type; StringRef path; + uint32_t compatibilityVersion; + uint32_t currentVersion; + static uint32_t instanceCount; }; +uint32_t LCDylib::instanceCount = 0; + class LCLoadDylinker : public LoadCommand { public: uint32_t getSize() const override { @@ -243,66 +305,209 @@ private: // different location. const StringRef path = "/usr/lib/dyld"; }; + +class LCRPath : public LoadCommand { +public: + LCRPath(StringRef path) : path(path) {} + + uint32_t getSize() const override { + return alignTo(sizeof(rpath_command) + path.size() + 1, WordSize); + } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast<rpath_command *>(buf); + buf += sizeof(rpath_command); + + c->cmd = LC_RPATH; + c->cmdsize = getSize(); + c->path = sizeof(rpath_command); + + memcpy(buf, path.data(), path.size()); + buf[path.size()] = '\0'; + } + +private: + StringRef path; +}; + +class LCBuildVersion : public LoadCommand { +public: + LCBuildVersion(const PlatformInfo &platform) : platform(platform) {} + + const int ntools = 1; + + uint32_t getSize() const override { + return sizeof(build_version_command) + ntools * sizeof(build_tool_version); + } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast<build_version_command *>(buf); + c->cmd = LC_BUILD_VERSION; + c->cmdsize = getSize(); + c->platform = static_cast<uint32_t>(platform.kind); + c->minos = ((platform.minimum.getMajor() << 020) | + (platform.minimum.getMinor().getValueOr(0) << 010) | + platform.minimum.getSubminor().getValueOr(0)); + c->sdk = ((platform.sdk.getMajor() << 020) | + (platform.sdk.getMinor().getValueOr(0) << 010) | + platform.sdk.getSubminor().getValueOr(0)); + c->ntools = ntools; + auto *t = reinterpret_cast<build_tool_version *>(&c[1]); + t->tool = TOOL_LD; + t->version = (LLVM_VERSION_MAJOR << 020) | (LLVM_VERSION_MINOR << 010) | + LLVM_VERSION_PATCH; + } + + const PlatformInfo &platform; +}; + +// Stores a unique identifier for the output file based on an MD5 hash of its +// contents. In order to hash the contents, we must first write them, but +// LC_UUID itself must be part of the written contents in order for all the +// offsets to be calculated correctly. We resolve this circular paradox by +// first writing an LC_UUID with an all-zero UUID, then updating the UUID with +// its real value later. +class LCUuid : public LoadCommand { +public: + uint32_t getSize() const override { return sizeof(uuid_command); } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast<uuid_command *>(buf); + c->cmd = LC_UUID; + c->cmdsize = getSize(); + uuidBuf = c->uuid; + } + + void writeUuid(uint64_t digest) const { + // xxhash only gives us 8 bytes, so put some fixed data in the other half. + static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size"); + memcpy(uuidBuf, "LLD\xa1UU1D", 8); + memcpy(uuidBuf + 8, &digest, 8); + + // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in + // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't + // want to lose bits of the digest in byte 8, so swap that with a byte of + // fixed data that happens to have the right bits set. + std::swap(uuidBuf[3], uuidBuf[8]); + + // Claim that this is an MD5-based hash. It isn't, but this signals that + // this is not a time-based and not a random hash. MD5 seems like the least + // bad lie we can put here. + assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3"); + assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2"); + } + + mutable uint8_t *uuidBuf; +}; + } // namespace void Writer::scanRelocations() { for (InputSection *isec : inputSections) { + // We do not wish to add rebase opcodes for __LD,__compact_unwind, because + // it doesn't actually end up in the final binary. TODO: filtering it out + // before Writer runs might be cleaner... + if (isec->segname == segment_names::ld) + continue; + for (Reloc &r : isec->relocs) { - if (auto *s = r.target.dyn_cast<lld::macho::Symbol *>()) { + if (auto *s = r.referent.dyn_cast<lld::macho::Symbol *>()) { if (isa<Undefined>(s)) - error("undefined symbol " + s->getName() + ", referenced from " + - sys::path::filename(isec->file->getName())); + treatUndefinedSymbol(toString(*s), toString(isec->file)); else - target->prepareSymbolRelocation(*s, isec, r); + target->prepareSymbolRelocation(s, isec, r); + } else { + assert(r.referent.is<InputSection *>()); + if (!r.pcrel) + in.rebase->addEntry(isec, r.offset); } } } } +void Writer::scanSymbols() { + for (const macho::Symbol *sym : symtab->getSymbols()) { + if (const auto *defined = dyn_cast<Defined>(sym)) { + if (defined->overridesWeakDef) + in.weakBinding->addNonWeakDefinition(defined); + } else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { + dysym->file->refState = std::max(dysym->file->refState, dysym->refState); + } + } +} + void Writer::createLoadCommands() { - headerSection->addLoadCommand( - make<LCDyldInfo>(in.binding, lazyBindingSection, exportSection)); - headerSection->addLoadCommand( - make<LCSymtab>(symtabSection, stringTableSection)); - headerSection->addLoadCommand(make<LCDysymtab>()); + in.header->addLoadCommand(make<LCDyldInfo>( + in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports)); + in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection)); + in.header->addLoadCommand( + make<LCDysymtab>(symtabSection, indirectSymtabSection)); + for (StringRef path : config->runtimePaths) + in.header->addLoadCommand(make<LCRPath>(path)); switch (config->outputType) { case MH_EXECUTE: - headerSection->addLoadCommand(make<LCMain>()); - headerSection->addLoadCommand(make<LCLoadDylinker>()); + in.header->addLoadCommand(make<LCMain>()); + in.header->addLoadCommand(make<LCLoadDylinker>()); break; case MH_DYLIB: - headerSection->addLoadCommand( - make<LCDylib>(LC_ID_DYLIB, config->installName)); + in.header->addLoadCommand(make<LCDylib>(LC_ID_DYLIB, config->installName, + config->dylibCompatibilityVersion, + config->dylibCurrentVersion)); + break; + case MH_BUNDLE: break; default: llvm_unreachable("unhandled output file type"); } + in.header->addLoadCommand(make<LCBuildVersion>(config->platform)); + + uuidCommand = make<LCUuid>(); + in.header->addLoadCommand(uuidCommand); + uint8_t segIndex = 0; for (OutputSegment *seg : outputSegments) { - headerSection->addLoadCommand(make<LCSegment>(seg->name, seg)); + in.header->addLoadCommand(make<LCSegment>(seg->name, seg)); seg->index = segIndex++; } uint64_t dylibOrdinal = 1; for (InputFile *file : inputFiles) { if (auto *dylibFile = dyn_cast<DylibFile>(file)) { - headerSection->addLoadCommand( - make<LCDylib>(LC_LOAD_DYLIB, dylibFile->dylibName)); + LoadCommandType lcType = + dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak + ? LC_LOAD_WEAK_DYLIB + : LC_LOAD_DYLIB; + in.header->addLoadCommand(make<LCDylib>(lcType, dylibFile->dylibName, + dylibFile->compatibilityVersion, + dylibFile->currentVersion)); dylibFile->ordinal = dylibOrdinal++; if (dylibFile->reexport) - headerSection->addLoadCommand( + in.header->addLoadCommand( make<LCDylib>(LC_REEXPORT_DYLIB, dylibFile->dylibName)); } } + + const uint32_t MACOS_MAXPATHLEN = 1024; + config->headerPad = std::max( + config->headerPad, (config->headerPadMaxInstallNames + ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN + : 0)); } static size_t getSymbolPriority(const SymbolPriorityEntry &entry, - const InputFile &file) { - return std::max(entry.objectFiles.lookup(sys::path::filename(file.getName())), - entry.anyObjectFile); + const InputFile *f) { + // We don't use toString(InputFile *) here because it returns the full path + // for object files, and we only want the basename. + StringRef filename; + if (f->archiveName.empty()) + filename = path::filename(f->getName()); + else + filename = saver.save(path::filename(f->archiveName) + "(" + + path::filename(f->getName()) + ")"); + return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile); } // Each section gets assigned the priority of the highest-priority symbol it @@ -320,12 +525,12 @@ static DenseMap<const InputSection *, size_t> buildInputSectionPriorities() { SymbolPriorityEntry &entry = it->second; size_t &priority = sectionPriorities[sym.isec]; - priority = std::max(priority, getSymbolPriority(entry, *sym.isec->file)); + priority = std::max(priority, getSymbolPriority(entry, sym.isec->file)); }; // TODO: Make sure this handles weak symbols correctly. for (InputFile *file : inputFiles) - if (isa<ObjFile>(file) || isa<ArchiveFile>(file)) + if (isa<ObjFile>(file)) for (lld::macho::Symbol *sym : file->symbols) if (auto *d = dyn_cast<Defined>(sym)) addSym(*d); @@ -347,19 +552,46 @@ static int sectionOrder(OutputSection *osec) { StringRef segname = osec->parent->name; // Sections are uniquely identified by their segment + section name. if (segname == segment_names::text) { - if (osec->name == section_names::header) - return -1; - } else if (segname == segment_names::linkEdit) { return StringSwitch<int>(osec->name) - .Case(section_names::binding, -4) - .Case(section_names::export_, -3) - .Case(section_names::symbolTable, -2) + .Case(section_names::header, -1) + .Case(section_names::unwindInfo, std::numeric_limits<int>::max() - 1) + .Case(section_names::ehFrame, std::numeric_limits<int>::max()) + .Default(0); + } + if (segname == segment_names::data) { + // For each thread spawned, dyld will initialize its TLVs by copying the + // address range from the start of the first thread-local data section to + // the end of the last one. We therefore arrange these sections contiguously + // to minimize the amount of memory used. Additionally, since zerofill + // sections must be at the end of their segments, and since TLV data + // sections can be zerofills, we end up putting all TLV data sections at the + // end of the segment. + switch (sectionType(osec->flags)) { + case S_THREAD_LOCAL_REGULAR: + return std::numeric_limits<int>::max() - 2; + case S_THREAD_LOCAL_ZEROFILL: + return std::numeric_limits<int>::max() - 1; + case S_ZEROFILL: + return std::numeric_limits<int>::max(); + default: + return 0; + } + } + if (segname == segment_names::linkEdit) { + return StringSwitch<int>(osec->name) + .Case(section_names::rebase, -8) + .Case(section_names::binding, -7) + .Case(section_names::weakBinding, -6) + .Case(section_names::lazyBinding, -5) + .Case(section_names::export_, -4) + .Case(section_names::symbolTable, -3) + .Case(section_names::indirectSymbolTable, -2) .Case(section_names::stringTable, -1) .Default(0); } // ZeroFill sections must always be the at the end of their segments, // otherwise subsequent sections may get overwritten with zeroes at runtime. - if (isZeroFill(osec->flags)) + if (sectionType(osec->flags) == S_ZEROFILL) return std::numeric_limits<int>::max(); return 0; } @@ -388,6 +620,9 @@ static void sortSegmentsAndSections() { if (!osec->isHidden()) osec->index = ++sectionIndex; + if (!firstTLVDataSection && isThreadLocalData(osec->flags)) + firstTLVDataSection = osec; + if (!isecPriorities.empty()) { if (auto *merged = dyn_cast<MergedOutputSection>(osec)) { llvm::stable_sort(merged->inputs, @@ -402,17 +637,17 @@ static void sortSegmentsAndSections() { void Writer::createOutputSections() { // First, create hidden sections - headerSection = make<MachHeaderSection>(); - lazyBindingSection = make<LazyBindingSection>(); stringTableSection = make<StringTableSection>(); + unwindInfoSection = make<UnwindInfoSection>(); // TODO(gkm): only when no -r symtabSection = make<SymtabSection>(*stringTableSection); - exportSection = make<ExportSection>(); + indirectSymtabSection = make<IndirectSymtabSection>(); switch (config->outputType) { case MH_EXECUTE: make<PageZeroSection>(); break; case MH_DYLIB: + case MH_BUNDLE: break; default: llvm_unreachable("unhandled output file type"); @@ -432,7 +667,12 @@ void Writer::createOutputSections() { for (const auto &it : mergedOutputSections) { StringRef segname = it.first.first; MergedOutputSection *osec = it.second; - getOrCreateOutputSegment(segname)->addOutputSection(osec); + if (unwindInfoSection && segname == segment_names::ld) { + assert(osec->name == section_names::compactUnwind); + unwindInfoSection->setCompactUnwindSection(osec); + } else { + getOrCreateOutputSegment(segname)->addOutputSection(osec); + } } for (SyntheticSection *ssec : syntheticSections) { @@ -441,7 +681,7 @@ void Writer::createOutputSections() { if (ssec->isNeeded()) getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec); } else { - error("section from " + it->second->firstSection()->file->getName() + + error("section from " + toString(it->second->firstSection()->file) + " conflicts with synthetic section " + ssec->segname + "," + ssec->name); } @@ -454,6 +694,8 @@ void Writer::assignAddresses(OutputSegment *seg) { seg->fileOff = fileOff; for (auto *osec : seg->getSections()) { + if (!osec->isNeeded()) + continue; addr = alignTo(addr, osec->align); fileOff = alignTo(fileOff, osec->align); osec->addr = addr; @@ -484,14 +726,22 @@ void Writer::writeSections() { osec->writeTo(buf + osec->fileOff); } +void Writer::writeUuid() { + uint64_t digest = + xxHash64({buffer->getBufferStart(), buffer->getBufferEnd()}); + uuidCommand->writeUuid(digest); +} + void Writer::run() { // dyld requires __LINKEDIT segment to always exist (even if empty). OutputSegment *linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit); + prepareBranchTarget(config->entry); scanRelocations(); if (in.stubHelper->isNeeded()) in.stubHelper->setup(); + scanSymbols(); // Sort and assign sections to their respective segments. No more sections nor // segments may be created after these methods run. @@ -511,10 +761,13 @@ void Writer::run() { assignAddresses(seg); // Fill __LINKEDIT contents. + in.rebase->finalizeContents(); in.binding->finalizeContents(); - lazyBindingSection->finalizeContents(); - exportSection->finalizeContents(); + in.weakBinding->finalizeContents(); + in.lazyBinding->finalizeContents(); + in.exports->finalizeContents(); symtabSection->finalizeContents(); + indirectSymtabSection->finalizeContents(); // Now that __LINKEDIT is filled out, do a proper calculation of its // addresses and offsets. @@ -525,6 +778,7 @@ void Writer::run() { return; writeSections(); + writeUuid(); if (auto e = buffer->commit()) error("failed to write to the output file: " + toString(std::move(e))); @@ -533,10 +787,18 @@ void Writer::run() { void macho::writeResult() { Writer().run(); } void macho::createSyntheticSections() { + in.header = make<MachHeaderSection>(); + in.rebase = make<RebaseSection>(); in.binding = make<BindingSection>(); + in.weakBinding = make<WeakBindingSection>(); + in.lazyBinding = make<LazyBindingSection>(); + in.exports = make<ExportSection>(); in.got = make<GotSection>(); + in.tlvPointers = make<TlvPointerSection>(); in.lazyPointers = make<LazyPointerSection>(); in.stubs = make<StubsSection>(); in.stubHelper = make<StubHelperSection>(); in.imageLoaderCache = make<ImageLoaderCacheSection>(); } + +OutputSection *macho::firstTLVDataSection = nullptr; diff --git a/contrib/llvm-project/lld/MachO/Writer.h b/contrib/llvm-project/lld/MachO/Writer.h index 7f846233107a..88baa8a1e4bb 100644 --- a/contrib/llvm-project/lld/MachO/Writer.h +++ b/contrib/llvm-project/lld/MachO/Writer.h @@ -14,6 +14,8 @@ namespace lld { namespace macho { +class OutputSection; + class LoadCommand { public: virtual ~LoadCommand() = default; @@ -25,6 +27,8 @@ void writeResult(); void createSyntheticSections(); +extern OutputSection *firstTLVDataSection; + } // namespace macho } // namespace lld |