aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp')
-rw-r--r--llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp368
1 files changed, 368 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp b/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
new file mode 100644
index 000000000000..0e49984c6ee3
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
@@ -0,0 +1,368 @@
+//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Our algorithm works by first identifying a subset of nodes that must always
+// be instrumented. We call these nodes ambiguous because knowing the coverage
+// of all remaining nodes is not enough to infer their coverage status.
+//
+// In general a node v is ambiguous if there exists two entry-to-terminal paths
+// P_1 and P_2 such that:
+// 1. v not in P_1 but P_1 visits a predecessor of v, and
+// 2. v not in P_2 but P_2 visits a successor of v.
+//
+// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
+// coverage from the coverage of its predecessors, or if condition 2 fails, we
+// can infer v’s coverage from the coverage of its successors.
+//
+// Sadly, there are example CFGs where it is not possible to infer all nodes
+// from the ambiguous nodes alone. Our algorithm selects a minimum number of
+// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
+//
+// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-block-coverage"
+
+STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
+STATISTIC(NumIneligibleFunctions,
+ "Number of functions for which BCI cannot run on");
+STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
+STATISTIC(NumInstrumentedBlocks,
+ "Number of basic blocks instrumented for coverage");
+
+BlockCoverageInference::BlockCoverageInference(const Function &F,
+ bool ForceInstrumentEntry)
+ : F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
+ findDependencies();
+ assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));
+
+ ++NumFunctions;
+ for (auto &BB : F) {
+ ++NumBlocks;
+ if (shouldInstrumentBlock(BB))
+ ++NumInstrumentedBlocks;
+ }
+}
+
+BlockCoverageInference::BlockSet
+BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
+ assert(BB.getParent() == &F);
+ BlockSet Dependencies;
+ auto It = PredecessorDependencies.find(&BB);
+ if (It != PredecessorDependencies.end())
+ Dependencies.set_union(It->second);
+ It = SuccessorDependencies.find(&BB);
+ if (It != SuccessorDependencies.end())
+ Dependencies.set_union(It->second);
+ return Dependencies;
+}
+
+uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
+ JamCRC JC;
+ uint64_t Index = 0;
+ for (auto &BB : F) {
+ if (shouldInstrumentBlock(BB)) {
+ uint8_t Data[8];
+ support::endian::write64le(Data, Index);
+ JC.update(Data);
+ }
+ Index++;
+ }
+ return JC.getCRC();
+}
+
+bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
+ assert(BB.getParent() == &F);
+ auto It = PredecessorDependencies.find(&BB);
+ if (It != PredecessorDependencies.end() && It->second.size())
+ return false;
+ It = SuccessorDependencies.find(&BB);
+ if (It != SuccessorDependencies.end() && It->second.size())
+ return false;
+ return true;
+}
+
+void BlockCoverageInference::findDependencies() {
+ assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
+ // Empirical analysis shows that this algorithm finishes within 5 seconds for
+ // functions with fewer than 1.5K blocks.
+ if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) {
+ ++NumIneligibleFunctions;
+ return;
+ }
+
+ SmallVector<const BasicBlock *, 4> TerminalBlocks;
+ for (auto &BB : F)
+ if (succ_empty(&BB))
+ TerminalBlocks.push_back(&BB);
+
+ // Traverse the CFG backwards from the terminal blocks to make sure every
+ // block can reach some terminal block. Otherwise this algorithm will not work
+ // and we must fall back to instrumenting every block.
+ df_iterator_default_set<const BasicBlock *> Visited;
+ for (auto *BB : TerminalBlocks)
+ for (auto *N : inverse_depth_first_ext(BB, Visited))
+ (void)N;
+ if (F.size() != Visited.size()) {
+ ++NumIneligibleFunctions;
+ return;
+ }
+
+ // The current implementation for computing `PredecessorDependencies` and
+ // `SuccessorDependencies` runs in quadratic time with respect to the number
+ // of basic blocks. While we do have a more complicated linear time algorithm
+ // in https://arxiv.org/abs/2208.13907 we do not know if it will give a
+ // significant speedup in practice given that most functions tend to be
+ // relatively small in size for intended use cases.
+ auto &EntryBlock = F.getEntryBlock();
+ for (auto &BB : F) {
+ // The set of blocks that are reachable while avoiding BB.
+ BlockSet ReachableFromEntry, ReachableFromTerminal;
+ getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,
+ ReachableFromEntry);
+ for (auto *TerminalBlock : TerminalBlocks)
+ getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false,
+ ReachableFromTerminal);
+
+ auto Preds = predecessors(&BB);
+ bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {
+ return ReachableFromEntry.count(Pred) &&
+ ReachableFromTerminal.count(Pred);
+ });
+ if (!HasSuperReachablePred)
+ for (auto *Pred : Preds)
+ if (ReachableFromEntry.count(Pred))
+ PredecessorDependencies[&BB].insert(Pred);
+
+ auto Succs = successors(&BB);
+ bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {
+ return ReachableFromEntry.count(Succ) &&
+ ReachableFromTerminal.count(Succ);
+ });
+ if (!HasSuperReachableSucc)
+ for (auto *Succ : Succs)
+ if (ReachableFromTerminal.count(Succ))
+ SuccessorDependencies[&BB].insert(Succ);
+ }
+
+ if (ForceInstrumentEntry) {
+ // Force the entry block to be instrumented by clearing the blocks it can
+ // infer coverage from.
+ PredecessorDependencies[&EntryBlock].clear();
+ SuccessorDependencies[&EntryBlock].clear();
+ }
+
+ // Construct a graph where blocks are connected if there is a mutual
+ // dependency between them. This graph has a special property that it contains
+ // only paths.
+ DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
+ for (auto &BB : F) {
+ for (auto *Succ : successors(&BB)) {
+ if (SuccessorDependencies[&BB].count(Succ) &&
+ PredecessorDependencies[Succ].count(&BB)) {
+ AdjacencyList[&BB].insert(Succ);
+ AdjacencyList[Succ].insert(&BB);
+ }
+ }
+ }
+
+ // Given a path with at least one node, return the next node on the path.
+ auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
+ assert(Path.size());
+ auto &Neighbors = AdjacencyList[Path.back()];
+ if (Path.size() == 1) {
+ // This is the first node on the path, return its neighbor.
+ assert(Neighbors.size() == 1);
+ return Neighbors.front();
+ } else if (Neighbors.size() == 2) {
+ // This is the middle of the path, find the neighbor that is not on the
+ // path already.
+ assert(Path.size() >= 2);
+ return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];
+ }
+ // This is the end of the path.
+ assert(Neighbors.size() == 1);
+ return nullptr;
+ };
+
+ // Remove all cycles in the inferencing graph.
+ for (auto &BB : F) {
+ if (AdjacencyList[&BB].size() == 1) {
+ // We found the head of some path.
+ BlockSet Path;
+ Path.insert(&BB);
+ while (const BasicBlock *Next = getNextOnPath(Path))
+ Path.insert(Next);
+ LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
+
+ // Remove these nodes from the graph so we don't discover this path again.
+ for (auto *BB : Path)
+ AdjacencyList[BB].clear();
+
+ // Finally, remove the cycles.
+ if (PredecessorDependencies[Path.front()].size()) {
+ for (auto *BB : Path)
+ if (BB != Path.back())
+ SuccessorDependencies[BB].clear();
+ } else {
+ for (auto *BB : Path)
+ if (BB != Path.front())
+ PredecessorDependencies[BB].clear();
+ }
+ }
+ }
+ LLVM_DEBUG(dump(dbgs()));
+}
+
+void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
+ const BasicBlock &Avoid,
+ bool IsForward,
+ BlockSet &Reachable) const {
+ df_iterator_default_set<const BasicBlock *> Visited;
+ Visited.insert(&Avoid);
+ if (IsForward) {
+ auto Range = depth_first_ext(&Start, Visited);
+ Reachable.insert(Range.begin(), Range.end());
+ } else {
+ auto Range = inverse_depth_first_ext(&Start, Visited);
+ Reachable.insert(Range.begin(), Range.end());
+ }
+}
+
+namespace llvm {
+class DotFuncBCIInfo {
+private:
+ const BlockCoverageInference *BCI;
+ const DenseMap<const BasicBlock *, bool> *Coverage;
+
+public:
+ DotFuncBCIInfo(const BlockCoverageInference *BCI,
+ const DenseMap<const BasicBlock *, bool> *Coverage)
+ : BCI(BCI), Coverage(Coverage) {}
+
+ const Function &getFunction() { return BCI->F; }
+
+ bool isInstrumented(const BasicBlock *BB) const {
+ return BCI->shouldInstrumentBlock(*BB);
+ }
+
+ bool isCovered(const BasicBlock *BB) const {
+ return Coverage && Coverage->lookup(BB);
+ }
+
+ bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {
+ return BCI->getDependencies(*Src).count(Dest);
+ }
+};
+
+template <>
+struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {
+ static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
+ return &(Info->getFunction().getEntryBlock());
+ }
+
+ // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+ using nodes_iterator = pointer_iterator<Function::const_iterator>;
+
+ static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
+ return nodes_iterator(Info->getFunction().begin());
+ }
+
+ static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
+ return nodes_iterator(Info->getFunction().end());
+ }
+
+ static size_t size(DotFuncBCIInfo *Info) {
+ return Info->getFunction().size();
+ }
+};
+
+template <>
+struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
+
+ static std::string getGraphName(DotFuncBCIInfo *Info) {
+ return "BCI CFG for " + Info->getFunction().getName().str();
+ }
+
+ std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {
+ return Node->getName().str();
+ }
+
+ std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
+ DotFuncBCIInfo *Info) {
+ const BasicBlock *Dest = *I;
+ if (Info->isDependent(Src, Dest))
+ return "color=red";
+ if (Info->isDependent(Dest, Src))
+ return "color=blue";
+ return "";
+ }
+
+ std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {
+ std::string Result;
+ if (Info->isInstrumented(Node))
+ Result += "style=filled,fillcolor=gray";
+ if (Info->isCovered(Node))
+ Result += std::string(Result.empty() ? "" : ",") + "color=red";
+ return Result;
+ }
+};
+
+} // namespace llvm
+
+void BlockCoverageInference::viewBlockCoverageGraph(
+ const DenseMap<const BasicBlock *, bool> *Coverage) const {
+ DotFuncBCIInfo Info(this, Coverage);
+ WriteGraph(&Info, "BCI", false,
+ "Block Coverage Inference for " + F.getName());
+}
+
+void BlockCoverageInference::dump(raw_ostream &OS) const {
+ OS << "Minimal block coverage for function \'" << F.getName()
+ << "\' (Instrumented=*)\n";
+ for (auto &BB : F) {
+ OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n";
+ auto It = PredecessorDependencies.find(&BB);
+ if (It != PredecessorDependencies.end() && It->second.size())
+ OS << " PredDeps = " << getBlockNames(It->second) << "\n";
+ It = SuccessorDependencies.find(&BB);
+ if (It != SuccessorDependencies.end() && It->second.size())
+ OS << " SuccDeps = " << getBlockNames(It->second) << "\n";
+ }
+ OS << " Instrumented Blocks Hash = 0x"
+ << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
+}
+
+std::string
+BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "[";
+ if (!BBs.empty()) {
+ OS << BBs.front()->getName();
+ BBs = BBs.drop_front();
+ }
+ for (auto *BB : BBs)
+ OS << ", " << BB->getName();
+ OS << "]";
+ return OS.str();
+}