diff options
Diffstat (limited to 'lld/MachO/ICF.cpp')
| -rw-r--r-- | lld/MachO/ICF.cpp | 202 |
1 files changed, 143 insertions, 59 deletions
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index f9dea4b861ac..ad029142681f 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -8,12 +8,17 @@ #include "ICF.h" #include "ConcatOutputSection.h" +#include "Config.h" #include "InputSection.h" +#include "SymbolTable.h" #include "Symbols.h" #include "UnwindInfoSection.h" +#include "lld/Common/CommonLinkerContext.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/xxhash.h" #include <atomic> @@ -21,23 +26,34 @@ using namespace llvm; using namespace lld; using namespace lld::macho; +static constexpr bool verboseDiagnostics = false; + class ICF { public: ICF(std::vector<ConcatInputSection *> &inputs); - void run(); - void segregate(size_t begin, size_t end, - std::function<bool(const ConcatInputSection *, - const ConcatInputSection *)> - equals); + + using EqualsFn = bool (ICF::*)(const ConcatInputSection *, + const ConcatInputSection *); + void segregate(size_t begin, size_t end, EqualsFn); size_t findBoundary(size_t begin, size_t end); void forEachClassRange(size_t begin, size_t end, - std::function<void(size_t, size_t)> func); - void forEachClass(std::function<void(size_t, size_t)> func); + llvm::function_ref<void(size_t, size_t)> func); + void forEachClass(llvm::function_ref<void(size_t, size_t)> func); + + bool equalsConstant(const ConcatInputSection *ia, + const ConcatInputSection *ib); + bool equalsVariable(const ConcatInputSection *ia, + const ConcatInputSection *ib); // ICF needs a copy of the inputs vector because its equivalence-class // segregation algorithm destroys the proper sequence. std::vector<ConcatInputSection *> icfInputs; + + unsigned icfPass = 0; + std::atomic<bool> icfRepeat{false}; + std::atomic<uint64_t> equalsConstantCount{0}; + std::atomic<uint64_t> equalsVariableCount{0}; }; ICF::ICF(std::vector<ConcatInputSection *> &inputs) { @@ -74,13 +90,12 @@ ICF::ICF(std::vector<ConcatInputSection *> &inputs) { // FIXME(gkm): implement keep-unique attributes // FIXME(gkm): implement address-significance tables for MachO object files -static unsigned icfPass = 0; -static std::atomic<bool> icfRepeat{false}; - // Compare "non-moving" parts of two ConcatInputSections, namely everything // except references to other ConcatInputSections. -static bool equalsConstant(const ConcatInputSection *ia, - const ConcatInputSection *ib) { +bool ICF::equalsConstant(const ConcatInputSection *ia, + const ConcatInputSection *ib) { + if (verboseDiagnostics) + ++equalsConstantCount; // We can only fold within the same OutputSection. if (ia->parent != ib->parent) return false; @@ -99,8 +114,6 @@ static bool equalsConstant(const ConcatInputSection *ia, return false; if (ra.offset != rb.offset) return false; - if (ra.addend != rb.addend) - return false; if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>()) return false; @@ -113,16 +126,16 @@ static bool equalsConstant(const ConcatInputSection *ia, const auto *sb = rb.referent.get<Symbol *>(); if (sa->kind() != sb->kind()) return false; - if (!isa<Defined>(sa)) { - // ICF runs before Undefineds are reported. - assert(isa<DylibSymbol>(sa) || isa<Undefined>(sa)); - return sa == sb; - } + // ICF runs before Undefineds are treated (and potentially converted into + // DylibSymbols). + if (isa<DylibSymbol>(sa) || isa<Undefined>(sa)) + return sa == sb && ra.addend == rb.addend; + assert(isa<Defined>(sa)); const auto *da = cast<Defined>(sa); const auto *db = cast<Defined>(sb); if (!da->isec || !db->isec) { assert(da->isAbsolute() && db->isAbsolute()); - return da->value == db->value; + return da->value + ra.addend == db->value + rb.addend; } isecA = da->isec; valueA = da->value; @@ -139,11 +152,20 @@ static bool equalsConstant(const ConcatInputSection *ia, assert(isecA->kind() == isecB->kind()); // We will compare ConcatInputSection contents in equalsVariable. if (isa<ConcatInputSection>(isecA)) - return true; + return ra.addend == rb.addend; // Else we have two literal sections. References to them are equal iff their // offsets in the output section are equal. - return isecA->getOffset(valueA + ra.addend) == - isecB->getOffset(valueB + rb.addend); + if (ra.referent.is<Symbol *>()) + // For symbol relocs, we compare the contents at the symbol address. We + // don't do `getOffset(value + addend)` because value + addend may not be + // a valid offset in the literal section. + return isecA->getOffset(valueA) == isecB->getOffset(valueB) && + ra.addend == rb.addend; + else { + assert(valueA == 0 && valueB == 0); + // For section relocs, we compare the content at the section offset. + return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend); + } }; return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f); @@ -151,10 +173,12 @@ static bool equalsConstant(const ConcatInputSection *ia, // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not // handled by equalsConstant(). -static bool equalsVariable(const ConcatInputSection *ia, - const ConcatInputSection *ib) { +bool ICF::equalsVariable(const ConcatInputSection *ia, + const ConcatInputSection *ib) { + if (verboseDiagnostics) + ++equalsVariableCount; assert(ia->relocs.size() == ib->relocs.size()); - auto f = [](const Reloc &ra, const Reloc &rb) { + auto f = [this](const Reloc &ra, const Reloc &rb) { // We already filtered out mismatching values/addends in equalsConstant. if (ra.referent == rb.referent) return true; @@ -188,9 +212,9 @@ static bool equalsVariable(const ConcatInputSection *ia, // info matches. For simplicity, we only handle the case where there are only // symbols at offset zero within the section (which is typically the case with // .subsections_via_symbols.) - auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; }; - auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU); - auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU); + auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; }; + auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasUnwind); + auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasUnwind); if (itA == ia->symbols.end()) return itB == ib->symbols.end(); if (itB == ib->symbols.end()) @@ -219,7 +243,7 @@ size_t ICF::findBoundary(size_t begin, size_t end) { // Invoke FUNC on subranges with matching equivalence class void ICF::forEachClassRange(size_t begin, size_t end, - std::function<void(size_t, size_t)> func) { + llvm::function_ref<void(size_t, size_t)> func) { while (begin < end) { size_t mid = findBoundary(begin, end); func(begin, mid); @@ -229,7 +253,7 @@ void ICF::forEachClassRange(size_t begin, size_t end, // Split icfInputs into shards, then parallelize invocation of FUNC on subranges // with matching equivalence class -void ICF::forEachClass(std::function<void(size_t, size_t)> func) { +void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) { // Only use threads when the benefits outweigh the overhead. const size_t threadingThreshold = 1024; if (icfInputs.size() < threadingThreshold) { @@ -246,10 +270,10 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> func) { size_t boundaries[shards + 1]; boundaries[0] = 0; boundaries[shards] = icfInputs.size(); - parallelForEachN(1, shards, [&](size_t i) { + parallelFor(1, shards, [&](size_t i) { boundaries[i] = findBoundary((i - 1) * step, icfInputs.size()); }); - parallelForEachN(1, shards + 1, [&](size_t i) { + parallelFor(1, shards + 1, [&](size_t i) { if (boundaries[i - 1] < boundaries[i]) { forEachClassRange(boundaries[i - 1], boundaries[i], func); } @@ -261,27 +285,28 @@ void ICF::run() { // Into each origin-section hash, combine all reloc referent section hashes. for (icfPass = 0; icfPass < 2; ++icfPass) { parallelForEach(icfInputs, [&](ConcatInputSection *isec) { - uint64_t hash = isec->icfEqClass[icfPass % 2]; + uint32_t hash = isec->icfEqClass[icfPass % 2]; for (const Reloc &r : isec->relocs) { if (auto *sym = r.referent.dyn_cast<Symbol *>()) { - if (auto *dylibSym = dyn_cast<DylibSymbol>(sym)) - hash += dylibSym->stubsHelperIndex; - else if (auto *defined = dyn_cast<Defined>(sym)) { + if (auto *defined = dyn_cast<Defined>(sym)) { if (defined->isec) { - if (auto isec = dyn_cast<ConcatInputSection>(defined->isec)) - hash += defined->value + isec->icfEqClass[icfPass % 2]; + if (auto referentIsec = + dyn_cast<ConcatInputSection>(defined->isec)) + hash += defined->value + referentIsec->icfEqClass[icfPass % 2]; else hash += defined->isec->kind() + defined->isec->getOffset(defined->value); } else { hash += defined->value; } - } else if (!isa<Undefined>(sym)) // ICF runs before Undefined diags. - llvm_unreachable("foldIdenticalSections symbol kind"); + } else { + // ICF runs before Undefined diags + assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym)); + } } } // Set MSB to 1 to avoid collisions with non-hashed classes. - isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 63); + isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31); }); } @@ -289,17 +314,22 @@ void ICF::run() { icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) { return a->icfEqClass[0] < b->icfEqClass[0]; }); - forEachClass( - [&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); }); + forEachClass([&](size_t begin, size_t end) { + segregate(begin, end, &ICF::equalsConstant); + }); // Split equivalence groups by comparing relocations until convergence do { icfRepeat = false; forEachClass([&](size_t begin, size_t end) { - segregate(begin, end, equalsVariable); + segregate(begin, end, &ICF::equalsVariable); }); } while (icfRepeat); log("ICF needed " + Twine(icfPass) + " iterations"); + if (verboseDiagnostics) { + log("equalsConstant() called " + Twine(equalsConstantCount) + " times"); + log("equalsVariable() called " + Twine(equalsVariableCount) + " times"); + } // Fold sections within equivalence classes forEachClass([&](size_t begin, size_t end) { @@ -312,18 +342,15 @@ void ICF::run() { } // Split an equivalence class into smaller classes. -void ICF::segregate( - size_t begin, size_t end, - std::function<bool(const ConcatInputSection *, const ConcatInputSection *)> - equals) { +void ICF::segregate(size_t begin, size_t end, EqualsFn equals) { while (begin < end) { // Divide [begin, end) into two. Let mid be the start index of the // second group. - auto bound = std::stable_partition(icfInputs.begin() + begin + 1, - icfInputs.begin() + end, - [&](ConcatInputSection *isec) { - return equals(icfInputs[begin], isec); - }); + auto bound = std::stable_partition( + icfInputs.begin() + begin + 1, icfInputs.begin() + end, + [&](ConcatInputSection *isec) { + return (this->*equals)(icfInputs[begin], isec); + }); size_t mid = bound - icfInputs.begin(); // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an @@ -339,6 +366,40 @@ void ICF::segregate( } } +void macho::markSymAsAddrSig(Symbol *s) { + if (auto *d = dyn_cast_or_null<Defined>(s)) + if (d->isec) + d->isec->keepUnique = true; +} + +void macho::markAddrSigSymbols() { + TimeTraceScope timeScope("Mark addrsig symbols"); + for (InputFile *file : inputFiles) { + ObjFile *obj = dyn_cast<ObjFile>(file); + if (!obj) + continue; + + Section *addrSigSection = obj->addrSigSection; + if (!addrSigSection) + continue; + assert(addrSigSection->subsections.size() == 1); + + Subsection *subSection = &addrSigSection->subsections[0]; + ArrayRef<unsigned char> &contents = subSection->isec->data; + + const uint8_t *pData = contents.begin(); + while (pData != contents.end()) { + unsigned size; + const char *err; + uint32_t symIndex = decodeULEB128(pData, &size, contents.end(), &err); + if (err) + fatal(toString(file) + ": could not decode addrsig section: " + err); + markSymAsAddrSig(obj->symbols[symIndex]); + pData += size; + } + } +} + void macho::foldIdenticalSections() { TimeTraceScope timeScope("Fold Identical Code Sections"); // The ICF equivalence-class segregation algorithm relies on pre-computed @@ -359,19 +420,42 @@ void macho::foldIdenticalSections() { uint64_t icfUniqueID = inputSections.size(); for (ConcatInputSection *isec : inputSections) { // FIXME: consider non-code __text sections as hashable? - bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) && - !isec->shouldOmitFromOutput() && isec->isHashableForICF(); + bool isHashable = (isCodeSection(isec) || isCfStringSection(isec) || + isClassRefsSection(isec)) && + !isec->keepUnique && !isec->shouldOmitFromOutput() && + sectionType(isec->getFlags()) == MachO::S_REGULAR; if (isHashable) { hashable.push_back(isec); for (Defined *d : isec->symbols) if (d->unwindEntry) hashable.push_back(d->unwindEntry); - } else { + + // __cfstring has embedded addends that foil ICF's hashing / equality + // checks. (We can ignore embedded addends when doing ICF because the same + // information gets recorded in our Reloc structs.) We therefore create a + // mutable copy of the CFString and zero out the embedded addends before + // performing any hashing / equality checks. + if (isCfStringSection(isec) || isClassRefsSection(isec)) { + // We have to do this copying serially as the BumpPtrAllocator is not + // thread-safe. FIXME: Make a thread-safe allocator. + MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc()); + for (const Reloc &r : isec->relocs) + target->relocateOne(copy.data() + r.offset, r, /*va=*/0, + /*relocVA=*/0); + isec->data = copy; + } + } else if (!isEhFrameSection(isec)) { + // EH frames are gathered as hashables from unwindEntry above; give a + // unique ID to everything else. isec->icfEqClass[0] = ++icfUniqueID; } } - parallelForEach(hashable, - [](ConcatInputSection *isec) { isec->hashForICF(); }); + parallelForEach(hashable, [](ConcatInputSection *isec) { + assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID! + // Turn-on the top bit to guarantee that valid hashes have no collisions + // with the small-integer unique IDs for ICF-ineligible sections + isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 31); + }); // Now that every input section is either hashed or marked as unique, run the // segregation algorithm to detect foldable subsections. ICF(hashable).run(); |
