summaryrefslogtreecommitdiff
path: root/COFF/ICF.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'COFF/ICF.cpp')
-rw-r--r--COFF/ICF.cpp244
1 files changed, 244 insertions, 0 deletions
diff --git a/COFF/ICF.cpp b/COFF/ICF.cpp
new file mode 100644
index 000000000000..f99b41624a84
--- /dev/null
+++ b/COFF/ICF.cpp
@@ -0,0 +1,244 @@
+//===- ICF.cpp ------------------------------------------------------------===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Identical COMDAT Folding is a feature to merge COMDAT sections not by
+// name (which is regular COMDAT handling) but by contents. If two COMDAT
+// sections have the same data, relocations, attributes, etc., then the two
+// are considered identical and merged by the linker. This optimization
+// makes outputs smaller.
+//
+// ICF is theoretically a problem of reducing graphs by merging as many
+// identical subgraphs as possible, if we consider sections as vertices and
+// relocations as edges. This may be a bit more complicated problem than you
+// might think. The order of processing sections matters since merging two
+// sections can make other sections, whose relocations now point to the same
+// section, mergeable. Graphs may contain cycles, which is common in COFF.
+// We need a sophisticated algorithm to do this properly and efficiently.
+//
+// What we do in this file is this. We split sections into groups. Sections
+// in the same group are considered identical.
+//
+// First, all sections are grouped by their "constant" values. Constant
+// values are values that are never changed by ICF, such as section contents,
+// section name, number of relocations, type and offset of each relocation,
+// etc. Because we do not care about some relocation targets in this step,
+// two sections in the same group may not be identical, but at least two
+// sections in different groups can never be identical.
+//
+// Then, we try to split each group by relocation targets. Relocations are
+// considered identical if and only if the relocation targets are in the
+// same group. Splitting a group may make more groups to be splittable,
+// because two relocations that were previously considered identical might
+// now point to different groups. We repeat this step until the convergence
+// is obtained.
+//
+// This algorithm is so-called "optimistic" algorithm described in
+// http://research.google.com/pubs/pub36912.html.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Chunks.h"
+#include "Symbols.h"
+#include "lld/Core/Parallel.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <atomic>
+#include <vector>
+
+using namespace llvm;
+
+namespace lld {
+namespace coff {
+
+typedef std::vector<SectionChunk *>::iterator ChunkIterator;
+typedef bool (*Comparator)(const SectionChunk *, const SectionChunk *);
+
+class ICF {
+public:
+ void run(const std::vector<Chunk *> &V);
+
+private:
+ static uint64_t getHash(SectionChunk *C);
+ static bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
+ static bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
+ bool forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq);
+ bool partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq);
+
+ std::atomic<uint64_t> NextID = { 1 };
+};
+
+// Entry point to ICF.
+void doICF(const std::vector<Chunk *> &Chunks) {
+ ICF().run(Chunks);
+}
+
+uint64_t ICF::getHash(SectionChunk *C) {
+ return hash_combine(C->getPermissions(),
+ hash_value(C->SectionName),
+ C->NumRelocs,
+ C->getAlign(),
+ uint32_t(C->Header->SizeOfRawData),
+ C->Checksum);
+}
+
+bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
+ if (A->AssocChildren.size() != B->AssocChildren.size() ||
+ A->NumRelocs != B->NumRelocs) {
+ return false;
+ }
+
+ // Compare associative sections.
+ for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I)
+ if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID)
+ return false;
+
+ // Compare relocations.
+ auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
+ if (R1.Type != R2.Type ||
+ R1.VirtualAddress != R2.VirtualAddress) {
+ return false;
+ }
+ SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl();
+ SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl();
+ if (B1 == B2)
+ return true;
+ if (auto *D1 = dyn_cast<DefinedRegular>(B1))
+ if (auto *D2 = dyn_cast<DefinedRegular>(B2))
+ return D1->getValue() == D2->getValue() &&
+ D1->getChunk()->GroupID == D2->getChunk()->GroupID;
+ return false;
+ };
+ if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq))
+ return false;
+
+ // Compare section attributes and contents.
+ return A->getPermissions() == B->getPermissions() &&
+ A->SectionName == B->SectionName &&
+ A->getAlign() == B->getAlign() &&
+ A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
+ A->Checksum == B->Checksum &&
+ A->getContents() == B->getContents();
+}
+
+bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
+ // Compare associative sections.
+ for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I)
+ if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID)
+ return false;
+
+ // Compare relocations.
+ auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
+ SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl();
+ SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl();
+ if (B1 == B2)
+ return true;
+ if (auto *D1 = dyn_cast<DefinedRegular>(B1))
+ if (auto *D2 = dyn_cast<DefinedRegular>(B2))
+ return D1->getChunk()->GroupID == D2->getChunk()->GroupID;
+ return false;
+ };
+ return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq);
+}
+
+bool ICF::partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq) {
+ bool R = false;
+ for (auto It = Begin;;) {
+ SectionChunk *Head = *It;
+ auto Bound = std::partition(It + 1, End, [&](SectionChunk *SC) {
+ return Eq(Head, SC);
+ });
+ if (Bound == End)
+ return R;
+ uint64_t ID = NextID++;
+ std::for_each(It, Bound, [&](SectionChunk *SC) { SC->GroupID = ID; });
+ It = Bound;
+ R = true;
+ }
+}
+
+bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) {
+ bool R = false;
+ for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) {
+ SectionChunk *Head = *It;
+ auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) {
+ return SC->GroupID != Head->GroupID;
+ });
+ if (partition(It, Bound, Eq))
+ R = true;
+ It = Bound;
+ }
+ return R;
+}
+
+// Merge identical COMDAT sections.
+// Two sections are considered the same if their section headers,
+// contents and relocations are all the same.
+void ICF::run(const std::vector<Chunk *> &Vec) {
+ // Collect only mergeable sections and group by hash value.
+ parallel_for_each(Vec.begin(), Vec.end(), [&](Chunk *C) {
+ if (auto *SC = dyn_cast<SectionChunk>(C)) {
+ bool Global = SC->Sym && SC->Sym->isExternal();
+ bool Writable = SC->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
+ if (SC->isCOMDAT() && SC->isLive() && Global && !Writable)
+ SC->GroupID = getHash(SC) | (uint64_t(1) << 63);
+ }
+ });
+ std::vector<SectionChunk *> Chunks;
+ for (Chunk *C : Vec) {
+ if (auto *SC = dyn_cast<SectionChunk>(C)) {
+ if (SC->GroupID) {
+ Chunks.push_back(SC);
+ } else {
+ SC->GroupID = NextID++;
+ }
+ }
+ }
+
+ // From now on, sections in Chunks are ordered so that sections in
+ // the same group are consecutive in the vector.
+ std::sort(Chunks.begin(), Chunks.end(),
+ [](SectionChunk *A, SectionChunk *B) {
+ return A->GroupID < B->GroupID;
+ });
+
+ // Split groups until we get a convergence.
+ int Cnt = 1;
+ forEachGroup(Chunks, equalsConstant);
+
+ for (;;) {
+ if (!forEachGroup(Chunks, equalsVariable))
+ break;
+ ++Cnt;
+ }
+ if (Config->Verbose)
+ llvm::outs() << "\nICF needed " << Cnt << " iterations.\n";
+
+ // Merge sections in the same group.
+ for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) {
+ SectionChunk *Head = *It++;
+ auto Bound = std::find_if(It, End, [&](SectionChunk *SC) {
+ return Head->GroupID != SC->GroupID;
+ });
+ if (It == Bound)
+ continue;
+ if (Config->Verbose)
+ llvm::outs() << "Selected " << Head->getDebugName() << "\n";
+ while (It != Bound) {
+ SectionChunk *SC = *It++;
+ if (Config->Verbose)
+ llvm::outs() << " Removed " << SC->getDebugName() << "\n";
+ Head->replace(SC);
+ }
+ }
+}
+
+} // namespace coff
+} // namespace lld