aboutsummaryrefslogtreecommitdiff
path: root/lld/MachO/InputFiles.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lld/MachO/InputFiles.cpp')
-rw-r--r--lld/MachO/InputFiles.cpp774
1 files changed, 653 insertions, 121 deletions
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index bbeb2dc09bf0..c53874133a78 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -45,6 +45,7 @@
#include "Config.h"
#include "Driver.h"
#include "Dwarf.h"
+#include "EhFrame.h"
#include "ExportTrie.h"
#include "InputSection.h"
#include "MachOStructs.h"
@@ -64,6 +65,7 @@
#include "llvm/LTO/LTO.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TarWriter.h"
@@ -95,6 +97,10 @@ std::string lld::toString(const InputFile *f) {
return (f->archiveName + "(" + path::filename(f->getName()) + ")").str();
}
+std::string lld::toString(const Section &sec) {
+ return (toString(sec.file) + ":(" + sec.name + ")").str();
+}
+
SetVector<InputFile *> macho::inputFiles;
std::unique_ptr<TarWriter> macho::tar;
int InputFile::idCount = 0;
@@ -112,6 +118,7 @@ static std::vector<PlatformInfo> getPlatformInfos(const InputFile *input) {
const char *hdr = input->mb.getBufferStart();
+ // "Zippered" object files can have multiple LC_BUILD_VERSION load commands.
std::vector<PlatformInfo> platformInfos;
for (auto *cmd : findCommands<build_version_command>(hdr, LC_BUILD_VERSION)) {
PlatformInfo info;
@@ -252,16 +259,38 @@ InputFile::InputFile(Kind kind, const InterfaceFile &interface)
// Note that "record" is a term I came up with. In contrast, "literal" is a term
// used by the Mach-O format.
static Optional<size_t> getRecordSize(StringRef segname, StringRef name) {
- if (name == section_names::cfString) {
- if (config->icfLevel != ICFLevel::none && segname == segment_names::data)
- return target->wordSize == 8 ? 32 : 16;
- } else if (name == section_names::compactUnwind) {
+ if (name == section_names::compactUnwind) {
if (segname == segment_names::ld)
return target->wordSize == 8 ? 32 : 20;
}
+ if (config->icfLevel == ICFLevel::none)
+ return {};
+
+ if (name == section_names::cfString && segname == segment_names::data)
+ return target->wordSize == 8 ? 32 : 16;
+ if (name == section_names::objcClassRefs && segname == segment_names::data)
+ return target->wordSize;
return {};
}
+static Error parseCallGraph(ArrayRef<uint8_t> data,
+ std::vector<CallGraphEntry> &callGraph) {
+ TimeTraceScope timeScope("Parsing call graph section");
+ BinaryStreamReader reader(data, support::little);
+ while (!reader.empty()) {
+ uint32_t fromIndex, toIndex;
+ uint64_t count;
+ if (Error err = reader.readInteger(fromIndex))
+ return err;
+ if (Error err = reader.readInteger(toIndex))
+ return err;
+ if (Error err = reader.readInteger(count))
+ return err;
+ callGraph.emplace_back(fromIndex, toIndex, count);
+ }
+ return Error::success();
+}
+
// Parse the sequence of sections within a single LC_SEGMENT(_64).
// Split each section into subsections.
template <class SectionHeader>
@@ -274,33 +303,29 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
StringRef segname =
StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
- ArrayRef<uint8_t> data = {isZeroFill(sec.flags) ? nullptr
- : buf + sec.offset,
- static_cast<size_t>(sec.size)};
+ sections.push_back(make<Section>(this, segname, name, sec.flags, sec.addr));
if (sec.align >= 32) {
error("alignment " + std::to_string(sec.align) + " of section " + name +
" is too large");
- sections.push_back(sec.addr);
continue;
}
+ Section &section = *sections.back();
uint32_t align = 1 << sec.align;
- uint32_t flags = sec.flags;
+ ArrayRef<uint8_t> data = {isZeroFill(sec.flags) ? nullptr
+ : buf + sec.offset,
+ static_cast<size_t>(sec.size)};
auto splitRecords = [&](int recordSize) -> void {
- sections.push_back(sec.addr);
if (data.empty())
return;
- Subsections &subsections = sections.back().subsections;
+ Subsections &subsections = section.subsections;
subsections.reserve(data.size() / recordSize);
- auto *isec = make<ConcatInputSection>(
- segname, name, this, data.slice(0, recordSize), align, flags);
- subsections.push_back({0, isec});
- for (uint64_t off = recordSize; off < data.size(); off += recordSize) {
- // Copying requires less memory than constructing a fresh InputSection.
- auto *copy = make<ConcatInputSection>(*isec);
- copy->data = data.slice(off, recordSize);
- subsections.push_back({off, copy});
+ for (uint64_t off = 0; off < data.size(); off += recordSize) {
+ auto *isec = make<ConcatInputSection>(
+ section, data.slice(off, recordSize), align);
+ subsections.push_back({off, isec});
}
+ section.doneSplitting = true;
};
if (sectionType(sec.flags) == S_CSTRING_LITERALS ||
@@ -313,66 +338,84 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
InputSection *isec;
if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
- isec =
- make<CStringInputSection>(segname, name, this, data, align, flags);
+ isec = make<CStringInputSection>(section, data, align);
// FIXME: parallelize this?
cast<CStringInputSection>(isec)->splitIntoPieces();
} else {
- isec = make<WordLiteralInputSection>(segname, name, this, data, align,
- flags);
+ isec = make<WordLiteralInputSection>(section, data, align);
}
- sections.push_back(sec.addr);
- sections.back().subsections.push_back({0, isec});
+ section.subsections.push_back({0, isec});
} else if (auto recordSize = getRecordSize(segname, name)) {
splitRecords(*recordSize);
- if (name == section_names::compactUnwind)
- compactUnwindSection = &sections.back();
+ } else if (config->parseEhFrames && name == section_names::ehFrame &&
+ segname == segment_names::text) {
+ splitEhFrames(data, *sections.back());
} else if (segname == segment_names::llvm) {
- if (name == "__cg_profile" && config->callGraphProfileSort) {
- TimeTraceScope timeScope("Parsing call graph section");
- BinaryStreamReader reader(data, support::little);
- while (!reader.empty()) {
- uint32_t fromIndex, toIndex;
- uint64_t count;
- if (Error err = reader.readInteger(fromIndex))
- fatal(toString(this) + ": Expected 32-bit integer");
- if (Error err = reader.readInteger(toIndex))
- fatal(toString(this) + ": Expected 32-bit integer");
- if (Error err = reader.readInteger(count))
- fatal(toString(this) + ": Expected 64-bit integer");
- callGraph.emplace_back();
- CallGraphEntry &entry = callGraph.back();
- entry.fromIndex = fromIndex;
- entry.toIndex = toIndex;
- entry.count = count;
- }
- }
+ if (config->callGraphProfileSort && name == section_names::cgProfile)
+ checkError(parseCallGraph(data, callGraph));
// ld64 does not appear to emit contents from sections within the __LLVM
// segment. Symbols within those sections point to bitcode metadata
// instead of actual symbols. Global symbols within those sections could
- // have the same name without causing duplicate symbol errors. Push an
- // empty entry to ensure indices line up for the remaining sections.
+ // have the same name without causing duplicate symbol errors. To avoid
+ // spurious duplicate symbol errors, we do not parse these sections.
// TODO: Evaluate whether the bitcode metadata is needed.
- sections.push_back(sec.addr);
} else {
- auto *isec =
- make<ConcatInputSection>(segname, name, this, data, align, flags);
+ if (name == section_names::addrSig)
+ addrSigSection = sections.back();
+
+ auto *isec = make<ConcatInputSection>(section, data, align);
if (isDebugSection(isec->getFlags()) &&
isec->getSegName() == segment_names::dwarf) {
// Instead of emitting DWARF sections, we emit STABS symbols to the
// object files that contain them. We filter them out early to avoid
- // parsing their relocations unnecessarily. But we must still push an
- // empty entry to ensure the indices line up for the remaining sections.
- sections.push_back(sec.addr);
+ // parsing their relocations unnecessarily.
debugSections.push_back(isec);
} else {
- sections.push_back(sec.addr);
- sections.back().subsections.push_back({0, isec});
+ section.subsections.push_back({0, isec});
}
}
}
}
+void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
+ EhReader reader(this, data, /*dataOff=*/0, target->wordSize);
+ size_t off = 0;
+ while (off < reader.size()) {
+ uint64_t frameOff = off;
+ uint64_t length = reader.readLength(&off);
+ if (length == 0)
+ break;
+ uint64_t fullLength = length + (off - frameOff);
+ off += length;
+ // We hard-code an alignment of 1 here because we don't actually want our
+ // EH frames to be aligned to the section alignment. EH frame decoders don't
+ // expect this alignment. Moreover, each EH frame must start where the
+ // previous one ends, and where it ends is indicated by the length field.
+ // Unless we update the length field (troublesome), we should keep the
+ // alignment to 1.
+ // Note that we still want to preserve the alignment of the overall section,
+ // just not of the individual EH frames.
+ ehFrameSection.subsections.push_back(
+ {frameOff, make<ConcatInputSection>(ehFrameSection,
+ data.slice(frameOff, fullLength),
+ /*align=*/1)});
+ }
+ ehFrameSection.doneSplitting = true;
+}
+
+template <class T>
+static Section *findContainingSection(const std::vector<Section *> &sections,
+ T *offset) {
+ static_assert(std::is_same<uint64_t, T>::value ||
+ std::is_same<uint32_t, T>::value,
+ "unexpected type for offset");
+ auto it = std::prev(llvm::upper_bound(
+ sections, *offset,
+ [](uint64_t value, const Section *sec) { return value < sec->addr; }));
+ *offset -= (*it)->addr;
+ return *it;
+}
+
// Find the subsection corresponding to the greatest section offset that is <=
// that of the given offset.
//
@@ -381,18 +424,180 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
// same location as an offset relative to the start of the containing
// subsection.
template <class T>
-static InputSection *findContainingSubsection(const Subsections &subsections,
+static InputSection *findContainingSubsection(const Section &section,
T *offset) {
static_assert(std::is_same<uint64_t, T>::value ||
std::is_same<uint32_t, T>::value,
"unexpected type for offset");
auto it = std::prev(llvm::upper_bound(
- subsections, *offset,
+ section.subsections, *offset,
[](uint64_t value, Subsection subsec) { return value < subsec.offset; }));
*offset -= it->offset;
return it->isec;
}
+// Find a symbol at offset `off` within `isec`.
+static Defined *findSymbolAtOffset(const ConcatInputSection *isec,
+ uint64_t off) {
+ auto it = llvm::lower_bound(isec->symbols, off, [](Defined *d, uint64_t off) {
+ return d->value < off;
+ });
+ // The offset should point at the exact address of a symbol (with no addend.)
+ if (it == isec->symbols.end() || (*it)->value != off) {
+ assert(isec->wasCoalesced);
+ return nullptr;
+ }
+ return *it;
+}
+
+// Linker optimization hints mark a sequence of instructions used for
+// synthesizing an address which that be transformed into a faster sequence. The
+// transformations depend on conditions that are determined at link time, like
+// the distance to the referenced symbol or its alignment.
+//
+// Each hint has a type and refers to 2 or 3 instructions. Each of those
+// instructions must have a corresponding relocation. After addresses have been
+// finalized and relocations have been performed, we check if the requirements
+// hold, and perform the optimizations if they do.
+//
+// Similar linker relaxations exist for ELF as well, with the difference being
+// that the explicit marking allows for the relaxation of non-consecutive
+// relocations too.
+//
+// The specific types of hints are documented in Arch/ARM64.cpp
+void ObjFile::parseOptimizationHints(ArrayRef<uint8_t> data) {
+ auto expectedArgCount = [](uint8_t type) {
+ switch (type) {
+ case LOH_ARM64_ADRP_ADRP:
+ case LOH_ARM64_ADRP_LDR:
+ case LOH_ARM64_ADRP_ADD:
+ case LOH_ARM64_ADRP_LDR_GOT:
+ return 2;
+ case LOH_ARM64_ADRP_ADD_LDR:
+ case LOH_ARM64_ADRP_ADD_STR:
+ case LOH_ARM64_ADRP_LDR_GOT_LDR:
+ case LOH_ARM64_ADRP_LDR_GOT_STR:
+ return 3;
+ }
+ return -1;
+ };
+
+ // Each hint contains at least 4 ULEB128-encoded fields, so in the worst case,
+ // there are data.size() / 4 LOHs. It's a huge overestimation though, as
+ // offsets are unlikely to fall in the 0-127 byte range, so we pre-allocate
+ // half as much.
+ optimizationHints.reserve(data.size() / 8);
+
+ for (const uint8_t *p = data.begin(); p < data.end();) {
+ const ptrdiff_t inputOffset = p - data.begin();
+ unsigned int n = 0;
+ uint8_t type = decodeULEB128(p, &n, data.end());
+ p += n;
+
+ // An entry of type 0 terminates the list.
+ if (type == 0)
+ break;
+
+ int expectedCount = expectedArgCount(type);
+ if (LLVM_UNLIKELY(expectedCount == -1)) {
+ error("Linker optimization hint at offset " + Twine(inputOffset) +
+ " has unknown type " + Twine(type));
+ return;
+ }
+
+ uint8_t argCount = decodeULEB128(p, &n, data.end());
+ p += n;
+
+ if (LLVM_UNLIKELY(argCount != expectedCount)) {
+ error("Linker optimization hint at offset " + Twine(inputOffset) +
+ " has " + Twine(argCount) + " arguments instead of the expected " +
+ Twine(expectedCount));
+ return;
+ }
+
+ uint64_t offset0 = decodeULEB128(p, &n, data.end());
+ p += n;
+
+ int16_t delta[2];
+ for (int i = 0; i < argCount - 1; ++i) {
+ uint64_t address = decodeULEB128(p, &n, data.end());
+ p += n;
+ int64_t d = address - offset0;
+ if (LLVM_UNLIKELY(d > std::numeric_limits<int16_t>::max() ||
+ d < std::numeric_limits<int16_t>::min())) {
+ error("Linker optimization hint at offset " + Twine(inputOffset) +
+ " has addresses too far apart");
+ return;
+ }
+ delta[i] = d;
+ }
+
+ optimizationHints.push_back({offset0, {delta[0], delta[1]}, type});
+ }
+
+ // We sort the per-object vector of optimization hints so each section only
+ // needs to hold an ArrayRef to a contiguous range of hints.
+ llvm::sort(optimizationHints,
+ [](const OptimizationHint &a, const OptimizationHint &b) {
+ return a.offset0 < b.offset0;
+ });
+
+ auto section = sections.begin();
+ auto subsection = (*section)->subsections.begin();
+ uint64_t subsectionBase = 0;
+ uint64_t subsectionEnd = 0;
+
+ auto updateAddr = [&]() {
+ subsectionBase = (*section)->addr + subsection->offset;
+ subsectionEnd = subsectionBase + subsection->isec->getSize();
+ };
+
+ auto advanceSubsection = [&]() {
+ if (section == sections.end())
+ return;
+ ++subsection;
+ if (subsection == (*section)->subsections.end()) {
+ ++section;
+ if (section == sections.end())
+ return;
+ subsection = (*section)->subsections.begin();
+ }
+ };
+
+ updateAddr();
+ auto hintStart = optimizationHints.begin();
+ for (auto hintEnd = hintStart, end = optimizationHints.end(); hintEnd != end;
+ ++hintEnd) {
+ if (hintEnd->offset0 >= subsectionEnd) {
+ subsection->isec->optimizationHints =
+ ArrayRef<OptimizationHint>(&*hintStart, hintEnd - hintStart);
+
+ hintStart = hintEnd;
+ while (hintStart->offset0 >= subsectionEnd) {
+ advanceSubsection();
+ if (section == sections.end())
+ break;
+ updateAddr();
+ }
+ }
+
+ hintEnd->offset0 -= subsectionBase;
+ for (int i = 0, count = expectedArgCount(hintEnd->type); i < count - 1;
+ ++i) {
+ if (LLVM_UNLIKELY(
+ hintEnd->delta[i] < -static_cast<int64_t>(hintEnd->offset0) ||
+ hintEnd->delta[i] >=
+ static_cast<int64_t>(subsectionEnd - hintEnd->offset0))) {
+ error("Linker optimization hint spans multiple sections");
+ return;
+ }
+ }
+ }
+ if (section != sections.end())
+ subsection->isec->optimizationHints = ArrayRef<OptimizationHint>(
+ &*hintStart, optimizationHints.end() - hintStart);
+}
+
template <class SectionHeader>
static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec,
relocation_info rel) {
@@ -427,12 +632,12 @@ static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec,
template <class SectionHeader>
void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
- const SectionHeader &sec,
- Subsections &subsections) {
+ const SectionHeader &sec, Section &section) {
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
ArrayRef<relocation_info> relInfos(
reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
+ Subsections &subsections = section.subsections;
auto subsecIt = subsections.rbegin();
for (size_t i = 0; i < relInfos.size(); i++) {
// Paired relocations serve as Mach-O's method for attaching a
@@ -463,13 +668,6 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
relocation_info relInfo = relInfos[i];
bool isSubtrahend =
target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
- if (isSubtrahend && StringRef(sec.sectname) == section_names::ehFrame) {
- // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
- // adds local "EH_Frame1" and "func.eh". Ignore them because they have
- // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
- ++i;
- continue;
- }
int64_t pairedAddend = 0;
if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
@@ -511,10 +709,8 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
// The addend for a non-pcrel relocation is its absolute address.
referentOffset = totalAddend - referentSecHead.addr;
}
- Subsections &referentSubsections =
- sections[relInfo.r_symbolnum - 1].subsections;
- r.referent =
- findContainingSubsection(referentSubsections, &referentOffset);
+ r.referent = findContainingSubsection(*sections[relInfo.r_symbolnum - 1],
+ &referentOffset);
r.addend = referentOffset;
}
@@ -528,7 +724,7 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
++subsecIt;
if (subsecIt == subsections.rend() ||
subsecIt->offset + subsecIt->isec->getSize() <= r.offset) {
- subsec = findContainingSubsection(subsections, &r.offset);
+ subsec = findContainingSubsection(section, &r.offset);
// Now that we know the relocs are unsorted, avoid trying the 'fast path'
// for the other relocations.
subsecIt = subsections.rend();
@@ -552,10 +748,8 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
} else {
uint64_t referentOffset =
totalAddend - sectionHeaders[minuendInfo.r_symbolnum - 1].addr;
- Subsections &referentSubsectVec =
- sections[minuendInfo.r_symbolnum - 1].subsections;
- p.referent =
- findContainingSubsection(referentSubsectVec, &referentOffset);
+ p.referent = findContainingSubsection(
+ *sections[minuendInfo.r_symbolnum - 1], &referentOffset);
p.addend = referentOffset;
}
subsec->relocs.push_back(p);
@@ -629,9 +823,11 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
}
assert(!isWeakDefCanBeHidden &&
"weak_def_can_be_hidden on already-hidden symbol?");
+ bool includeInSymtab =
+ !name.startswith("l") && !name.startswith("L") && !isEhFrameSection(isec);
return make<Defined>(
name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
- /*isExternal=*/false, /*isPrivateExtern=*/false,
+ /*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab,
sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY,
sym.n_desc & N_NO_DEAD_STRIP);
}
@@ -651,7 +847,7 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
/*isWeakDef=*/false,
/*isExternal=*/false, /*isPrivateExtern=*/false,
- sym.n_desc & N_ARM_THUMB_DEF,
+ /*includeInSymtab=*/true, sym.n_desc & N_ARM_THUMB_DEF,
/*isReferencedDynamically=*/false,
sym.n_desc & N_NO_DEAD_STRIP);
}
@@ -705,7 +901,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
StringRef name = strtab + sym.n_strx;
if ((sym.n_type & N_TYPE) == N_SECT) {
- Subsections &subsections = sections[sym.n_sect - 1].subsections;
+ Subsections &subsections = sections[sym.n_sect - 1]->subsections;
// parseSections() may have chosen not to parse this section.
if (subsections.empty())
continue;
@@ -718,33 +914,26 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
}
for (size_t i = 0; i < sections.size(); ++i) {
- Subsections &subsections = sections[i].subsections;
+ Subsections &subsections = sections[i]->subsections;
if (subsections.empty())
continue;
- InputSection *lastIsec = subsections.back().isec;
- if (lastIsec->getName() == section_names::ehFrame) {
- // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
- // adds local "EH_Frame1" and "func.eh". Ignore them because they have
- // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
- continue;
- }
std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
uint64_t sectionAddr = sectionHeaders[i].addr;
uint32_t sectionAlign = 1u << sectionHeaders[i].align;
- // Record-based sections have already been split into subsections during
+ // Some sections have already been split into subsections during
// parseSections(), so we simply need to match Symbols to the corresponding
// subsection here.
- if (getRecordSize(lastIsec->getSegName(), lastIsec->getName())) {
+ if (sections[i]->doneSplitting) {
for (size_t j = 0; j < symbolIndices.size(); ++j) {
uint32_t symIndex = symbolIndices[j];
const NList &sym = nList[symIndex];
StringRef name = strtab + sym.n_strx;
uint64_t symbolOffset = sym.n_value - sectionAddr;
InputSection *isec =
- findContainingSubsection(subsections, &symbolOffset);
+ findContainingSubsection(*sections[i], &symbolOffset);
if (symbolOffset != 0) {
- error(toString(lastIsec) + ": symbol " + name +
+ error(toString(*sections[i]) + ": symbol " + name +
" at misaligned offset");
continue;
}
@@ -752,6 +941,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
}
continue;
}
+ sections[i]->doneSplitting = true;
// Calculate symbol sizes and create subsections by splitting the sections
// along symbol boundaries.
@@ -829,12 +1019,13 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
: InputFile(OpaqueKind, mb) {
const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
ArrayRef<uint8_t> data = {buf, mb.getBufferSize()};
- ConcatInputSection *isec =
- make<ConcatInputSection>(segName.take_front(16), sectName.take_front(16),
- /*file=*/this, data);
+ sections.push_back(make<Section>(/*file=*/this, segName.take_front(16),
+ sectName.take_front(16),
+ /*flags=*/0, /*addr=*/0));
+ Section &section = *sections.back();
+ ConcatInputSection *isec = make<ConcatInputSection>(section, data);
isec->live = true;
- sections.push_back(0);
- sections.back().subsections.push_back({0, isec});
+ section.subsections.push_back({0, isec});
}
ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
@@ -904,13 +1095,30 @@ template <class LP> void ObjFile::parse() {
// The relocations may refer to the symbols, so we parse them after we have
// parsed all the symbols.
for (size_t i = 0, n = sections.size(); i < n; ++i)
- if (!sections[i].subsections.empty())
- parseRelocations(sectionHeaders, sectionHeaders[i],
- sections[i].subsections);
+ if (!sections[i]->subsections.empty())
+ parseRelocations(sectionHeaders, sectionHeaders[i], *sections[i]);
+
+ if (!config->ignoreOptimizationHints)
+ if (auto *cmd = findCommand<linkedit_data_command>(
+ hdr, LC_LINKER_OPTIMIZATION_HINT))
+ parseOptimizationHints({buf + cmd->dataoff, cmd->datasize});
parseDebugInfo();
+
+ Section *ehFrameSection = nullptr;
+ Section *compactUnwindSection = nullptr;
+ for (Section *sec : sections) {
+ Section **s = StringSwitch<Section **>(sec->name)
+ .Case(section_names::compactUnwind, &compactUnwindSection)
+ .Case(section_names::ehFrame, &ehFrameSection)
+ .Default(nullptr);
+ if (s)
+ *s = sec;
+ }
if (compactUnwindSection)
- registerCompactUnwind();
+ registerCompactUnwind(*compactUnwindSection);
+ if (config->parseEhFrames && ehFrameSection)
+ registerEhFrames(*ehFrameSection);
}
template <class LP> void ObjFile::parseLazy() {
@@ -944,6 +1152,8 @@ void ObjFile::parseDebugInfo() {
if (!dObj)
return;
+ // We do not re-use the context from getDwarf() here as that function
+ // constructs an expensive DWARFCache object.
auto *ctx = make<DWARFContext>(
std::move(dObj), "",
[&](Error err) {
@@ -959,7 +1169,7 @@ void ObjFile::parseDebugInfo() {
// FIXME: There can be more than one compile unit per object file. See
// PR48637.
auto it = units.begin();
- compileUnit = it->get();
+ compileUnit = it != units.end() ? it->get() : nullptr;
}
ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
@@ -973,8 +1183,8 @@ ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
}
// Create pointers from symbols to their associated compact unwind entries.
-void ObjFile::registerCompactUnwind() {
- for (const Subsection &subsection : compactUnwindSection->subsections) {
+void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
+ for (const Subsection &subsection : compactUnwindSection.subsections) {
ConcatInputSection *isec = cast<ConcatInputSection>(subsection.isec);
// Hack!! Since each CUE contains a different function address, if ICF
// operated naively and compared the entire contents of each CUE, entries
@@ -984,6 +1194,12 @@ void ObjFile::registerCompactUnwind() {
// of the corresponding relocations.) We rely on `relocateCompactUnwind()`
// to correctly handle these truncated input sections.
isec->data = isec->data.slice(target->wordSize);
+ uint32_t encoding = read32le(isec->data.data() + sizeof(uint32_t));
+ // llvm-mc omits CU entries for functions that need DWARF encoding, but
+ // `ld -r` doesn't. We can ignore them because we will re-synthesize these
+ // CU entries from the DWARF info during the output phase.
+ if ((encoding & target->modeDwarfEncoding) == target->modeDwarfEncoding)
+ continue;
ConcatInputSection *referentIsec;
for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
@@ -1007,23 +1223,23 @@ void ObjFile::registerCompactUnwind() {
referentIsec =
cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
}
+ // Unwind info lives in __DATA, and finalization of __TEXT will occur
+ // before finalization of __DATA. Moreover, the finalization of unwind
+ // info depends on the exact addresses that it references. So it is safe
+ // for compact unwind to reference addresses in __TEXT, but not addresses
+ // in any other segment.
if (referentIsec->getSegName() != segment_names::text)
- error("compact unwind references address in " + toString(referentIsec) +
- " which is not in segment __TEXT");
+ error(isec->getLocation(r.offset) + " references section " +
+ referentIsec->getName() + " which is not in segment __TEXT");
// The functionAddress relocations are typically section relocations.
// However, unwind info operates on a per-symbol basis, so we search for
// the function symbol here.
- auto symIt = llvm::lower_bound(
- referentIsec->symbols, add,
- [](Defined *d, uint64_t add) { return d->value < add; });
- // The relocation should point at the exact address of a symbol (with no
- // addend).
- if (symIt == referentIsec->symbols.end() || (*symIt)->value != add) {
- assert(referentIsec->wasCoalesced);
+ Defined *d = findSymbolAtOffset(referentIsec, add);
+ if (!d) {
++it;
continue;
}
- (*symIt)->unwindEntry = isec;
+ d->unwindEntry = isec;
// Since we've sliced away the functionAddress, we should remove the
// corresponding relocation too. Given that clang emits relocations in
// reverse order of address, this relocation should be at the end of the
@@ -1034,6 +1250,310 @@ void ObjFile::registerCompactUnwind() {
}
}
+struct CIE {
+ macho::Symbol *personalitySymbol = nullptr;
+ bool fdesHaveLsda = false;
+ bool fdesHaveAug = false;
+};
+
+static CIE parseCIE(const InputSection *isec, const EhReader &reader,
+ size_t off) {
+ // Handling the full generality of possible DWARF encodings would be a major
+ // pain. We instead take advantage of our knowledge of how llvm-mc encodes
+ // DWARF and handle just that.
+ constexpr uint8_t expectedPersonalityEnc =
+ dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
+ constexpr uint8_t expectedPointerEnc =
+ dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_absptr;
+
+ CIE cie;
+ uint8_t version = reader.readByte(&off);
+ if (version != 1 && version != 3)
+ fatal("Expected CIE version of 1 or 3, got " + Twine(version));
+ StringRef aug = reader.readString(&off);
+ reader.skipLeb128(&off); // skip code alignment
+ reader.skipLeb128(&off); // skip data alignment
+ reader.skipLeb128(&off); // skip return address register
+ reader.skipLeb128(&off); // skip aug data length
+ uint64_t personalityAddrOff = 0;
+ for (char c : aug) {
+ switch (c) {
+ case 'z':
+ cie.fdesHaveAug = true;
+ break;
+ case 'P': {
+ uint8_t personalityEnc = reader.readByte(&off);
+ if (personalityEnc != expectedPersonalityEnc)
+ reader.failOn(off, "unexpected personality encoding 0x" +
+ Twine::utohexstr(personalityEnc));
+ personalityAddrOff = off;
+ off += 4;
+ break;
+ }
+ case 'L': {
+ cie.fdesHaveLsda = true;
+ uint8_t lsdaEnc = reader.readByte(&off);
+ if (lsdaEnc != expectedPointerEnc)
+ reader.failOn(off, "unexpected LSDA encoding 0x" +
+ Twine::utohexstr(lsdaEnc));
+ break;
+ }
+ case 'R': {
+ uint8_t pointerEnc = reader.readByte(&off);
+ if (pointerEnc != expectedPointerEnc)
+ reader.failOn(off, "unexpected pointer encoding 0x" +
+ Twine::utohexstr(pointerEnc));
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ if (personalityAddrOff != 0) {
+ auto personalityRelocIt =
+ llvm::find_if(isec->relocs, [=](const macho::Reloc &r) {
+ return r.offset == personalityAddrOff;
+ });
+ if (personalityRelocIt == isec->relocs.end())
+ reader.failOn(off, "Failed to locate relocation for personality symbol");
+ cie.personalitySymbol = personalityRelocIt->referent.get<macho::Symbol *>();
+ }
+ return cie;
+}
+
+// EH frame target addresses may be encoded as pcrel offsets. However, instead
+// of using an actual pcrel reloc, ld64 emits subtractor relocations instead.
+// This function recovers the target address from the subtractors, essentially
+// performing the inverse operation of EhRelocator.
+//
+// Concretely, we expect our relocations to write the value of `PC -
+// target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that
+// points to a symbol plus an addend.
+//
+// It is important that the minuend relocation point to a symbol within the
+// same section as the fixup value, since sections may get moved around.
+//
+// For example, for arm64, llvm-mc emits relocations for the target function
+// address like so:
+//
+// ltmp:
+// <CIE start>
+// ...
+// <CIE end>
+// ... multiple FDEs ...
+// <FDE start>
+// <target function address - (ltmp + pcrel offset)>
+// ...
+//
+// If any of the FDEs in `multiple FDEs` get dead-stripped, then `FDE start`
+// will move to an earlier address, and `ltmp + pcrel offset` will no longer
+// reflect an accurate pcrel value. To avoid this problem, we "canonicalize"
+// our relocation by adding an `EH_Frame` symbol at `FDE start`, and updating
+// the reloc to be `target function address - (EH_Frame + new pcrel offset)`.
+//
+// If `Invert` is set, then we instead expect `target_addr - PC` to be written
+// to `PC`.
+template <bool Invert = false>
+Defined *
+targetSymFromCanonicalSubtractor(const InputSection *isec,
+ std::vector<macho::Reloc>::iterator relocIt) {
+ macho::Reloc &subtrahend = *relocIt;
+ macho::Reloc &minuend = *std::next(relocIt);
+ assert(target->hasAttr(subtrahend.type, RelocAttrBits::SUBTRAHEND));
+ assert(target->hasAttr(minuend.type, RelocAttrBits::UNSIGNED));
+ // Note: pcSym may *not* be exactly at the PC; there's usually a non-zero
+ // addend.
+ auto *pcSym = cast<Defined>(subtrahend.referent.get<macho::Symbol *>());
+ Defined *target =
+ cast_or_null<Defined>(minuend.referent.dyn_cast<macho::Symbol *>());
+ if (!pcSym) {
+ auto *targetIsec =
+ cast<ConcatInputSection>(minuend.referent.get<InputSection *>());
+ target = findSymbolAtOffset(targetIsec, minuend.addend);
+ }
+ if (Invert)
+ std::swap(pcSym, target);
+ if (pcSym->isec == isec) {
+ if (pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
+ fatal("invalid FDE relocation in __eh_frame");
+ } else {
+ // Ensure the pcReloc points to a symbol within the current EH frame.
+ // HACK: we should really verify that the original relocation's semantics
+ // are preserved. In particular, we should have
+ // `oldSym->value + oldOffset == newSym + newOffset`. However, we don't
+ // have an easy way to access the offsets from this point in the code; some
+ // refactoring is needed for that.
+ macho::Reloc &pcReloc = Invert ? minuend : subtrahend;
+ pcReloc.referent = isec->symbols[0];
+ assert(isec->symbols[0]->value == 0);
+ minuend.addend = pcReloc.offset * (Invert ? 1LL : -1LL);
+ }
+ return target;
+}
+
+Defined *findSymbolAtAddress(const std::vector<Section *> &sections,
+ uint64_t addr) {
+ Section *sec = findContainingSection(sections, &addr);
+ auto *isec = cast<ConcatInputSection>(findContainingSubsection(*sec, &addr));
+ return findSymbolAtOffset(isec, addr);
+}
+
+// For symbols that don't have compact unwind info, associate them with the more
+// general-purpose (and verbose) DWARF unwind info found in __eh_frame.
+//
+// This requires us to parse the contents of __eh_frame. See EhFrame.h for a
+// description of its format.
+//
+// While parsing, we also look for what MC calls "abs-ified" relocations -- they
+// are relocations which are implicitly encoded as offsets in the section data.
+// We convert them into explicit Reloc structs so that the EH frames can be
+// handled just like a regular ConcatInputSection later in our output phase.
+//
+// We also need to handle the case where our input object file has explicit
+// relocations. This is the case when e.g. it's the output of `ld -r`. We only
+// look for the "abs-ified" relocation if an explicit relocation is absent.
+void ObjFile::registerEhFrames(Section &ehFrameSection) {
+ DenseMap<const InputSection *, CIE> cieMap;
+ for (const Subsection &subsec : ehFrameSection.subsections) {
+ auto *isec = cast<ConcatInputSection>(subsec.isec);
+ uint64_t isecOff = subsec.offset;
+
+ // Subtractor relocs require the subtrahend to be a symbol reloc. Ensure
+ // that all EH frames have an associated symbol so that we can generate
+ // subtractor relocs that reference them.
+ if (isec->symbols.size() == 0)
+ isec->symbols.push_back(make<Defined>(
+ "EH_Frame", isec->getFile(), isec, /*value=*/0, /*size=*/0,
+ /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
+ /*includeInSymtab=*/false, /*isThumb=*/false,
+ /*isReferencedDynamically=*/false, /*noDeadStrip=*/false));
+ else if (isec->symbols[0]->value != 0)
+ fatal("found symbol at unexpected offset in __eh_frame");
+
+ EhReader reader(this, isec->data, subsec.offset, target->wordSize);
+ size_t dataOff = 0; // Offset from the start of the EH frame.
+ reader.skipValidLength(&dataOff); // readLength() already validated this.
+ // cieOffOff is the offset from the start of the EH frame to the cieOff
+ // value, which is itself an offset from the current PC to a CIE.
+ const size_t cieOffOff = dataOff;
+
+ EhRelocator ehRelocator(isec);
+ auto cieOffRelocIt = llvm::find_if(
+ isec->relocs, [=](const Reloc &r) { return r.offset == cieOffOff; });
+ InputSection *cieIsec = nullptr;
+ if (cieOffRelocIt != isec->relocs.end()) {
+ // We already have an explicit relocation for the CIE offset.
+ cieIsec =
+ targetSymFromCanonicalSubtractor</*Invert=*/true>(isec, cieOffRelocIt)
+ ->isec;
+ dataOff += sizeof(uint32_t);
+ } else {
+ // If we haven't found a relocation, then the CIE offset is most likely
+ // embedded in the section data (AKA an "abs-ified" reloc.). Parse that
+ // and generate a Reloc struct.
+ uint32_t cieMinuend = reader.readU32(&dataOff);
+ if (cieMinuend == 0)
+ cieIsec = isec;
+ else {
+ uint32_t cieOff = isecOff + dataOff - cieMinuend;
+ cieIsec = findContainingSubsection(ehFrameSection, &cieOff);
+ if (cieIsec == nullptr)
+ fatal("failed to find CIE");
+ }
+ if (cieIsec != isec)
+ ehRelocator.makeNegativePcRel(cieOffOff, cieIsec->symbols[0],
+ /*length=*/2);
+ }
+ if (cieIsec == isec) {
+ cieMap[cieIsec] = parseCIE(isec, reader, dataOff);
+ continue;
+ }
+
+ // Offset of the function address within the EH frame.
+ const size_t funcAddrOff = dataOff;
+ uint64_t funcAddr = reader.readPointer(&dataOff) + ehFrameSection.addr +
+ isecOff + funcAddrOff;
+ uint32_t funcLength = reader.readPointer(&dataOff);
+ size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
+ assert(cieMap.count(cieIsec));
+ const CIE &cie = cieMap[cieIsec];
+ Optional<uint64_t> lsdaAddrOpt;
+ if (cie.fdesHaveAug) {
+ reader.skipLeb128(&dataOff);
+ lsdaAddrOff = dataOff;
+ if (cie.fdesHaveLsda) {
+ uint64_t lsdaOff = reader.readPointer(&dataOff);
+ if (lsdaOff != 0) // FIXME possible to test this?
+ lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
+ }
+ }
+
+ auto funcAddrRelocIt = isec->relocs.end();
+ auto lsdaAddrRelocIt = isec->relocs.end();
+ for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
+ if (it->offset == funcAddrOff)
+ funcAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
+ else if (lsdaAddrOpt && it->offset == lsdaAddrOff)
+ lsdaAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
+ }
+
+ Defined *funcSym;
+ if (funcAddrRelocIt != isec->relocs.end()) {
+ funcSym = targetSymFromCanonicalSubtractor(isec, funcAddrRelocIt);
+ } else {
+ funcSym = findSymbolAtAddress(sections, funcAddr);
+ ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
+ }
+ // The symbol has been coalesced, or already has a compact unwind entry.
+ if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
+ // We must prune unused FDEs for correctness, so we cannot rely on
+ // -dead_strip being enabled.
+ isec->live = false;
+ continue;
+ }
+
+ InputSection *lsdaIsec = nullptr;
+ if (lsdaAddrRelocIt != isec->relocs.end()) {
+ lsdaIsec = targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec;
+ } else if (lsdaAddrOpt) {
+ uint64_t lsdaAddr = *lsdaAddrOpt;
+ Section *sec = findContainingSection(sections, &lsdaAddr);
+ lsdaIsec =
+ cast<ConcatInputSection>(findContainingSubsection(*sec, &lsdaAddr));
+ ehRelocator.makePcRel(lsdaAddrOff, lsdaIsec, target->p2WordSize);
+ }
+
+ fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
+ funcSym->unwindEntry = isec;
+ ehRelocator.commit();
+ }
+}
+
+std::string ObjFile::sourceFile() const {
+ SmallString<261> dir(compileUnit->getCompilationDir());
+ StringRef sep = sys::path::get_separator();
+ // We don't use `path::append` here because we want an empty `dir` to result
+ // in an absolute path. `append` would give us a relative path for that case.
+ if (!dir.endswith(sep))
+ dir += sep;
+ return (dir + compileUnit->getUnitDIE().getShortName()).str();
+}
+
+lld::DWARFCache *ObjFile::getDwarf() {
+ llvm::call_once(initDwarf, [this]() {
+ auto dwObj = DwarfObject::create(this);
+ if (!dwObj)
+ return;
+ dwarfCache = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
+ std::move(dwObj), "",
+ [&](Error err) { warn(getName() + ": " + toString(std::move(err))); },
+ [&](Error warning) {
+ warn(getName() + ": " + toString(std::move(warning)));
+ }));
+ });
+
+ return dwarfCache.get();
+}
// The path can point to either a dylib or a .tbd file.
static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
Optional<MemoryBufferRef> mbref = readFile(path);
@@ -1117,7 +1637,8 @@ static DylibFile *findDylib(StringRef path, DylibFile *umbrella,
make_pointee_range(currentTopLevelTapi->documents())) {
assert(child.documents().empty());
if (path == child.getInstallName()) {
- auto file = make<DylibFile>(child, umbrella);
+ auto file = make<DylibFile>(child, umbrella, /*isBundleLoader=*/false,
+ /*explicitlyLinked=*/false);
file->parseReexports(child);
return file;
}
@@ -1158,9 +1679,9 @@ static void loadReexport(StringRef path, DylibFile *umbrella,
}
DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
- bool isBundleLoader)
+ bool isBundleLoader, bool explicitlyLinked)
: InputFile(DylibKind, mb), refState(RefState::Unreferenced),
- isBundleLoader(isBundleLoader) {
+ explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
assert(!isBundleLoader || !umbrella);
if (umbrella == nullptr)
umbrella = this;
@@ -1269,17 +1790,26 @@ void DylibFile::parseLoadCommands(MemoryBufferRef mb) {
}
}
-// Some versions of XCode ship with .tbd files that don't have the right
+// Some versions of Xcode ship with .tbd files that don't have the right
// platform settings.
-static constexpr std::array<StringRef, 3> skipPlatformChecks{
+constexpr std::array<StringRef, 3> skipPlatformChecks{
"/usr/lib/system/libsystem_kernel.dylib",
"/usr/lib/system/libsystem_platform.dylib",
"/usr/lib/system/libsystem_pthread.dylib"};
+static bool skipPlatformCheckForCatalyst(const InterfaceFile &interface,
+ bool explicitlyLinked) {
+ // Catalyst outputs can link against implicitly linked macOS-only libraries.
+ if (config->platform() != PLATFORM_MACCATALYST || explicitlyLinked)
+ return false;
+ return is_contained(interface.targets(),
+ MachO::Target(config->arch(), PLATFORM_MACOS));
+}
+
DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
- bool isBundleLoader)
+ bool isBundleLoader, bool explicitlyLinked)
: InputFile(DylibKind, interface), refState(RefState::Unreferenced),
- isBundleLoader(isBundleLoader) {
+ explicitlyLinked(explicitlyLinked), isBundleLoader(isBundleLoader) {
// FIXME: Add test for the missing TBD code path.
if (umbrella == nullptr)
@@ -1295,7 +1825,8 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
inputFiles.insert(this);
if (!is_contained(skipPlatformChecks, installName) &&
- !is_contained(interface.targets(), config->platformInfo.target)) {
+ !is_contained(interface.targets(), config->platformInfo.target) &&
+ !skipPlatformCheckForCatalyst(interface, explicitlyLinked)) {
error(toString(this) + " is incompatible with " +
std::string(config->platformInfo.target));
return;
@@ -1569,6 +2100,7 @@ static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
case GlobalValue::DefaultVisibility:
break;
}
+ isPrivateExtern = isPrivateExtern || objSym.canBeOmittedFromSymbolTable();
if (objSym.isCommon())
return symtab->addCommon(name, &file, objSym.getCommonSize(),