diff options
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp | 368 |
1 files changed, 368 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp b/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp new file mode 100644 index 000000000000..0e49984c6ee3 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp @@ -0,0 +1,368 @@ +//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Our algorithm works by first identifying a subset of nodes that must always +// be instrumented. We call these nodes ambiguous because knowing the coverage +// of all remaining nodes is not enough to infer their coverage status. +// +// In general a node v is ambiguous if there exists two entry-to-terminal paths +// P_1 and P_2 such that: +// 1. v not in P_1 but P_1 visits a predecessor of v, and +// 2. v not in P_2 but P_2 visits a successor of v. +// +// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s +// coverage from the coverage of its predecessors, or if condition 2 fails, we +// can infer v’s coverage from the coverage of its successors. +// +// Sadly, there are example CFGs where it is not possible to infer all nodes +// from the ambiguous nodes alone. Our algorithm selects a minimum number of +// extra nodes to add to the ambiguous nodes to form a valid instrumentation S. +// +// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907 +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CRC.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "pgo-block-coverage" + +STATISTIC(NumFunctions, "Number of total functions that BCI has processed"); +STATISTIC(NumIneligibleFunctions, + "Number of functions for which BCI cannot run on"); +STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed"); +STATISTIC(NumInstrumentedBlocks, + "Number of basic blocks instrumented for coverage"); + +BlockCoverageInference::BlockCoverageInference(const Function &F, + bool ForceInstrumentEntry) + : F(F), ForceInstrumentEntry(ForceInstrumentEntry) { + findDependencies(); + assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock())); + + ++NumFunctions; + for (auto &BB : F) { + ++NumBlocks; + if (shouldInstrumentBlock(BB)) + ++NumInstrumentedBlocks; + } +} + +BlockCoverageInference::BlockSet +BlockCoverageInference::getDependencies(const BasicBlock &BB) const { + assert(BB.getParent() == &F); + BlockSet Dependencies; + auto It = PredecessorDependencies.find(&BB); + if (It != PredecessorDependencies.end()) + Dependencies.set_union(It->second); + It = SuccessorDependencies.find(&BB); + if (It != SuccessorDependencies.end()) + Dependencies.set_union(It->second); + return Dependencies; +} + +uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const { + JamCRC JC; + uint64_t Index = 0; + for (auto &BB : F) { + if (shouldInstrumentBlock(BB)) { + uint8_t Data[8]; + support::endian::write64le(Data, Index); + JC.update(Data); + } + Index++; + } + return JC.getCRC(); +} + +bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const { + assert(BB.getParent() == &F); + auto It = PredecessorDependencies.find(&BB); + if (It != PredecessorDependencies.end() && It->second.size()) + return false; + It = SuccessorDependencies.find(&BB); + if (It != SuccessorDependencies.end() && It->second.size()) + return false; + return true; +} + +void BlockCoverageInference::findDependencies() { + assert(PredecessorDependencies.empty() && SuccessorDependencies.empty()); + // Empirical analysis shows that this algorithm finishes within 5 seconds for + // functions with fewer than 1.5K blocks. + if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) { + ++NumIneligibleFunctions; + return; + } + + SmallVector<const BasicBlock *, 4> TerminalBlocks; + for (auto &BB : F) + if (succ_empty(&BB)) + TerminalBlocks.push_back(&BB); + + // Traverse the CFG backwards from the terminal blocks to make sure every + // block can reach some terminal block. Otherwise this algorithm will not work + // and we must fall back to instrumenting every block. + df_iterator_default_set<const BasicBlock *> Visited; + for (auto *BB : TerminalBlocks) + for (auto *N : inverse_depth_first_ext(BB, Visited)) + (void)N; + if (F.size() != Visited.size()) { + ++NumIneligibleFunctions; + return; + } + + // The current implementation for computing `PredecessorDependencies` and + // `SuccessorDependencies` runs in quadratic time with respect to the number + // of basic blocks. While we do have a more complicated linear time algorithm + // in https://arxiv.org/abs/2208.13907 we do not know if it will give a + // significant speedup in practice given that most functions tend to be + // relatively small in size for intended use cases. + auto &EntryBlock = F.getEntryBlock(); + for (auto &BB : F) { + // The set of blocks that are reachable while avoiding BB. + BlockSet ReachableFromEntry, ReachableFromTerminal; + getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true, + ReachableFromEntry); + for (auto *TerminalBlock : TerminalBlocks) + getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false, + ReachableFromTerminal); + + auto Preds = predecessors(&BB); + bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) { + return ReachableFromEntry.count(Pred) && + ReachableFromTerminal.count(Pred); + }); + if (!HasSuperReachablePred) + for (auto *Pred : Preds) + if (ReachableFromEntry.count(Pred)) + PredecessorDependencies[&BB].insert(Pred); + + auto Succs = successors(&BB); + bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) { + return ReachableFromEntry.count(Succ) && + ReachableFromTerminal.count(Succ); + }); + if (!HasSuperReachableSucc) + for (auto *Succ : Succs) + if (ReachableFromTerminal.count(Succ)) + SuccessorDependencies[&BB].insert(Succ); + } + + if (ForceInstrumentEntry) { + // Force the entry block to be instrumented by clearing the blocks it can + // infer coverage from. + PredecessorDependencies[&EntryBlock].clear(); + SuccessorDependencies[&EntryBlock].clear(); + } + + // Construct a graph where blocks are connected if there is a mutual + // dependency between them. This graph has a special property that it contains + // only paths. + DenseMap<const BasicBlock *, BlockSet> AdjacencyList; + for (auto &BB : F) { + for (auto *Succ : successors(&BB)) { + if (SuccessorDependencies[&BB].count(Succ) && + PredecessorDependencies[Succ].count(&BB)) { + AdjacencyList[&BB].insert(Succ); + AdjacencyList[Succ].insert(&BB); + } + } + } + + // Given a path with at least one node, return the next node on the path. + auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * { + assert(Path.size()); + auto &Neighbors = AdjacencyList[Path.back()]; + if (Path.size() == 1) { + // This is the first node on the path, return its neighbor. + assert(Neighbors.size() == 1); + return Neighbors.front(); + } else if (Neighbors.size() == 2) { + // This is the middle of the path, find the neighbor that is not on the + // path already. + assert(Path.size() >= 2); + return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0]; + } + // This is the end of the path. + assert(Neighbors.size() == 1); + return nullptr; + }; + + // Remove all cycles in the inferencing graph. + for (auto &BB : F) { + if (AdjacencyList[&BB].size() == 1) { + // We found the head of some path. + BlockSet Path; + Path.insert(&BB); + while (const BasicBlock *Next = getNextOnPath(Path)) + Path.insert(Next); + LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n"); + + // Remove these nodes from the graph so we don't discover this path again. + for (auto *BB : Path) + AdjacencyList[BB].clear(); + + // Finally, remove the cycles. + if (PredecessorDependencies[Path.front()].size()) { + for (auto *BB : Path) + if (BB != Path.back()) + SuccessorDependencies[BB].clear(); + } else { + for (auto *BB : Path) + if (BB != Path.front()) + PredecessorDependencies[BB].clear(); + } + } + } + LLVM_DEBUG(dump(dbgs())); +} + +void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start, + const BasicBlock &Avoid, + bool IsForward, + BlockSet &Reachable) const { + df_iterator_default_set<const BasicBlock *> Visited; + Visited.insert(&Avoid); + if (IsForward) { + auto Range = depth_first_ext(&Start, Visited); + Reachable.insert(Range.begin(), Range.end()); + } else { + auto Range = inverse_depth_first_ext(&Start, Visited); + Reachable.insert(Range.begin(), Range.end()); + } +} + +namespace llvm { +class DotFuncBCIInfo { +private: + const BlockCoverageInference *BCI; + const DenseMap<const BasicBlock *, bool> *Coverage; + +public: + DotFuncBCIInfo(const BlockCoverageInference *BCI, + const DenseMap<const BasicBlock *, bool> *Coverage) + : BCI(BCI), Coverage(Coverage) {} + + const Function &getFunction() { return BCI->F; } + + bool isInstrumented(const BasicBlock *BB) const { + return BCI->shouldInstrumentBlock(*BB); + } + + bool isCovered(const BasicBlock *BB) const { + return Coverage && Coverage->lookup(BB); + } + + bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const { + return BCI->getDependencies(*Src).count(Dest); + } +}; + +template <> +struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> { + static NodeRef getEntryNode(DotFuncBCIInfo *Info) { + return &(Info->getFunction().getEntryBlock()); + } + + // nodes_iterator/begin/end - Allow iteration over all nodes in the graph + using nodes_iterator = pointer_iterator<Function::const_iterator>; + + static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) { + return nodes_iterator(Info->getFunction().begin()); + } + + static nodes_iterator nodes_end(DotFuncBCIInfo *Info) { + return nodes_iterator(Info->getFunction().end()); + } + + static size_t size(DotFuncBCIInfo *Info) { + return Info->getFunction().size(); + } +}; + +template <> +struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits { + + DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} + + static std::string getGraphName(DotFuncBCIInfo *Info) { + return "BCI CFG for " + Info->getFunction().getName().str(); + } + + std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) { + return Node->getName().str(); + } + + std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I, + DotFuncBCIInfo *Info) { + const BasicBlock *Dest = *I; + if (Info->isDependent(Src, Dest)) + return "color=red"; + if (Info->isDependent(Dest, Src)) + return "color=blue"; + return ""; + } + + std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) { + std::string Result; + if (Info->isInstrumented(Node)) + Result += "style=filled,fillcolor=gray"; + if (Info->isCovered(Node)) + Result += std::string(Result.empty() ? "" : ",") + "color=red"; + return Result; + } +}; + +} // namespace llvm + +void BlockCoverageInference::viewBlockCoverageGraph( + const DenseMap<const BasicBlock *, bool> *Coverage) const { + DotFuncBCIInfo Info(this, Coverage); + WriteGraph(&Info, "BCI", false, + "Block Coverage Inference for " + F.getName()); +} + +void BlockCoverageInference::dump(raw_ostream &OS) const { + OS << "Minimal block coverage for function \'" << F.getName() + << "\' (Instrumented=*)\n"; + for (auto &BB : F) { + OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n"; + auto It = PredecessorDependencies.find(&BB); + if (It != PredecessorDependencies.end() && It->second.size()) + OS << " PredDeps = " << getBlockNames(It->second) << "\n"; + It = SuccessorDependencies.find(&BB); + if (It != SuccessorDependencies.end() && It->second.size()) + OS << " SuccDeps = " << getBlockNames(It->second) << "\n"; + } + OS << " Instrumented Blocks Hash = 0x" + << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n"; +} + +std::string +BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) { + std::string Result; + raw_string_ostream OS(Result); + OS << "["; + if (!BBs.empty()) { + OS << BBs.front()->getName(); + BBs = BBs.drop_front(); + } + for (auto *BB : BBs) + OS << ", " << BB->getName(); + OS << "]"; + return OS.str(); +} |
