diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp | 1256 | 
1 files changed, 1256 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp new file mode 100644 index 000000000000..5e92b9852a9f --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -0,0 +1,1256 @@ +//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a profile inference algorithm. Given an incomplete and +// possibly imprecise block counts, the algorithm reconstructs realistic block +// and edge counts that satisfy flow conservation rules, while minimally modify +// input block counts. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SampleProfileInference.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include <queue> +#include <set> +#include <stack> + +using namespace llvm; +#define DEBUG_TYPE "sample-profile-inference" + +namespace { + +static cl::opt<bool> SampleProfileEvenCountDistribution( +    "sample-profile-even-count-distribution", cl::init(true), cl::Hidden, +    cl::desc("Try to evenly distribute counts when there are multiple equally " +             "likely options.")); + +static cl::opt<unsigned> SampleProfileMaxDfsCalls( +    "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, +    cl::desc("Maximum number of dfs iterations for even count distribution.")); + +static cl::opt<unsigned> SampleProfileProfiCostInc( +    "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden, +    cl::desc("A cost of increasing a block's count by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostDec( +    "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden, +    cl::desc("A cost of decreasing a block's count by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostIncZero( +    "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden, +    cl::desc("A cost of increasing a count of zero-weight block by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostIncEntry( +    "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden, +    cl::desc("A cost of increasing the entry block's count by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostDecEntry( +    "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden, +    cl::desc("A cost of decreasing the entry block's count by one.")); + +/// A value indicating an infinite flow/capacity/weight of a block/edge. +/// Not using numeric_limits<int64_t>::max(), as the values can be summed up +/// during the execution. +static constexpr int64_t INF = ((int64_t)1) << 50; + +/// The minimum-cost maximum flow algorithm. +/// +/// The algorithm finds the maximum flow of minimum cost on a given (directed) +/// network using a modified version of the classical Moore-Bellman-Ford +/// approach. The algorithm applies a number of augmentation iterations in which +/// flow is sent along paths of positive capacity from the source to the sink. +/// The worst-case time complexity of the implementation is O(v(f)*m*n), where +/// where m is the number of edges, n is the number of vertices, and v(f) is the +/// value of the maximum flow. However, the observed running time on typical +/// instances is sub-quadratic, that is, o(n^2). +/// +/// The input is a set of edges with specified costs and capacities, and a pair +/// of nodes (source and sink). The output is the flow along each edge of the +/// minimum total cost respecting the given edge capacities. +class MinCostMaxFlow { +public: +  // Initialize algorithm's data structures for a network of a given size. +  void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { +    Source = SourceNode; +    Target = SinkNode; + +    Nodes = std::vector<Node>(NodeCount); +    Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); +    if (SampleProfileEvenCountDistribution) +      AugmentingEdges = +          std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>()); +  } + +  // Run the algorithm. +  int64_t run() { +    // Iteratively find an augmentation path/dag in the network and send the +    // flow along its edges +    size_t AugmentationIters = applyFlowAugmentation(); + +    // Compute the total flow and its cost +    int64_t TotalCost = 0; +    int64_t TotalFlow = 0; +    for (uint64_t Src = 0; Src < Nodes.size(); Src++) { +      for (auto &Edge : Edges[Src]) { +        if (Edge.Flow > 0) { +          TotalCost += Edge.Cost * Edge.Flow; +          if (Src == Source) +            TotalFlow += Edge.Flow; +        } +      } +    } +    LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters +                      << " iterations with " << TotalFlow << " total flow" +                      << " of " << TotalCost << " cost\n"); +    (void)TotalFlow; +    (void)AugmentationIters; +    return TotalCost; +  } + +  /// Adding an edge to the network with a specified capacity and a cost. +  /// Multiple edges between a pair of nodes are allowed but self-edges +  /// are not supported. +  void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { +    assert(Capacity > 0 && "adding an edge of zero capacity"); +    assert(Src != Dst && "loop edge are not supported"); + +    Edge SrcEdge; +    SrcEdge.Dst = Dst; +    SrcEdge.Cost = Cost; +    SrcEdge.Capacity = Capacity; +    SrcEdge.Flow = 0; +    SrcEdge.RevEdgeIndex = Edges[Dst].size(); + +    Edge DstEdge; +    DstEdge.Dst = Src; +    DstEdge.Cost = -Cost; +    DstEdge.Capacity = 0; +    DstEdge.Flow = 0; +    DstEdge.RevEdgeIndex = Edges[Src].size(); + +    Edges[Src].push_back(SrcEdge); +    Edges[Dst].push_back(DstEdge); +  } + +  /// Adding an edge to the network of infinite capacity and a given cost. +  void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { +    addEdge(Src, Dst, INF, Cost); +  } + +  /// Get the total flow from a given source node. +  /// Returns a list of pairs (target node, amount of flow to the target). +  const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { +    std::vector<std::pair<uint64_t, int64_t>> Flow; +    for (auto &Edge : Edges[Src]) { +      if (Edge.Flow > 0) +        Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); +    } +    return Flow; +  } + +  /// Get the total flow between a pair of nodes. +  int64_t getFlow(uint64_t Src, uint64_t Dst) const { +    int64_t Flow = 0; +    for (auto &Edge : Edges[Src]) { +      if (Edge.Dst == Dst) { +        Flow += Edge.Flow; +      } +    } +    return Flow; +  } + +  /// A cost of taking an unlikely jump. +  static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; +  /// Minimum BaseDistance for the jump distance values in island joining. +  static constexpr uint64_t MinBaseDistance = 10000; + +private: +  /// Iteratively find an augmentation path/dag in the network and send the +  /// flow along its edges. The method returns the number of applied iterations. +  size_t applyFlowAugmentation() { +    size_t AugmentationIters = 0; +    while (findAugmentingPath()) { +      uint64_t PathCapacity = computeAugmentingPathCapacity(); +      while (PathCapacity > 0) { +        bool Progress = false; +        if (SampleProfileEvenCountDistribution) { +          // Identify node/edge candidates for augmentation +          identifyShortestEdges(PathCapacity); + +          // Find an augmenting DAG +          auto AugmentingOrder = findAugmentingDAG(); + +          // Apply the DAG augmentation +          Progress = augmentFlowAlongDAG(AugmentingOrder); +          PathCapacity = computeAugmentingPathCapacity(); +        } + +        if (!Progress) { +          augmentFlowAlongPath(PathCapacity); +          PathCapacity = 0; +        } + +        AugmentationIters++; +      } +    } +    return AugmentationIters; +  } + +  /// Compute the capacity of the cannonical augmenting path. If the path is +  /// saturated (that is, no flow can be sent along the path), then return 0. +  uint64_t computeAugmentingPathCapacity() { +    uint64_t PathCapacity = INF; +    uint64_t Now = Target; +    while (Now != Source) { +      uint64_t Pred = Nodes[Now].ParentNode; +      auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; + +      assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow"); +      uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow); +      PathCapacity = std::min(PathCapacity, EdgeCapacity); + +      Now = Pred; +    } +    return PathCapacity; +  } + +  /// Check for existence of an augmenting path with a positive capacity. +  bool findAugmentingPath() { +    // Initialize data structures +    for (auto &Node : Nodes) { +      Node.Distance = INF; +      Node.ParentNode = uint64_t(-1); +      Node.ParentEdgeIndex = uint64_t(-1); +      Node.Taken = false; +    } + +    std::queue<uint64_t> Queue; +    Queue.push(Source); +    Nodes[Source].Distance = 0; +    Nodes[Source].Taken = true; +    while (!Queue.empty()) { +      uint64_t Src = Queue.front(); +      Queue.pop(); +      Nodes[Src].Taken = false; +      // Although the residual network contains edges with negative costs +      // (in particular, backward edges), it can be shown that there are no +      // negative-weight cycles and the following two invariants are maintained: +      // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, +      // where Dist is the length of the shortest path between two nodes. This +      // allows to prune the search-space of the path-finding algorithm using +      // the following early-stop criteria: +      // -- If we find a path with zero-distance from Source to Target, stop the +      //    search, as the path is the shortest since Dist[Source, Target] >= 0; +      // -- If we have Dist[Source, V] > Dist[Source, Target], then do not +      //    process node V, as it is guaranteed _not_ to be on a shortest path +      //    from Source to Target; it follows from inequalities +      //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] +      //                         >= Dist[Source, V] +      if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0) +        break; +      if (Nodes[Src].Distance > Nodes[Target].Distance) +        continue; + +      // Process adjacent edges +      for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { +        auto &Edge = Edges[Src][EdgeIdx]; +        if (Edge.Flow < Edge.Capacity) { +          uint64_t Dst = Edge.Dst; +          int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; +          if (Nodes[Dst].Distance > NewDistance) { +            // Update the distance and the parent node/edge +            Nodes[Dst].Distance = NewDistance; +            Nodes[Dst].ParentNode = Src; +            Nodes[Dst].ParentEdgeIndex = EdgeIdx; +            // Add the node to the queue, if it is not there yet +            if (!Nodes[Dst].Taken) { +              Queue.push(Dst); +              Nodes[Dst].Taken = true; +            } +          } +        } +      } +    } + +    return Nodes[Target].Distance != INF; +  } + +  /// Update the current flow along the augmenting path. +  void augmentFlowAlongPath(uint64_t PathCapacity) { +    assert(PathCapacity > 0 && "found an incorrect augmenting path"); +    uint64_t Now = Target; +    while (Now != Source) { +      uint64_t Pred = Nodes[Now].ParentNode; +      auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; +      auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; + +      Edge.Flow += PathCapacity; +      RevEdge.Flow -= PathCapacity; + +      Now = Pred; +    } +  } + +  /// Find an Augmenting DAG order using a modified version of DFS in which we +  /// can visit a node multiple times. In the DFS search, when scanning each +  /// edge out of a node, continue search at Edge.Dst endpoint if it has not +  /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm +  /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time. +  /// It returns an Augmenting Order (Taken nodes in decreasing Finish time) +  /// that starts with Source and ends with Target. +  std::vector<uint64_t> findAugmentingDAG() { +    // We use a stack based implemenation of DFS to avoid recursion. +    // Defining DFS data structures: +    // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that +    //  - we are currently visiting Nodes[NodeIdx] and +    //  - the next edge to scan is Edges[NodeIdx][EdgeIdx] +    typedef std::pair<uint64_t, uint64_t> StackItemType; +    std::stack<StackItemType> Stack; +    std::vector<uint64_t> AugmentingOrder; + +    // Phase 0: Initialize Node attributes and Time for DFS run +    for (auto &Node : Nodes) { +      Node.Discovery = 0; +      Node.Finish = 0; +      Node.NumCalls = 0; +      Node.Taken = false; +    } +    uint64_t Time = 0; +    // Mark Target as Taken +    // Taken attribute will be propagated backwards from Target towards Source +    Nodes[Target].Taken = true; + +    // Phase 1: Start DFS traversal from Source +    Stack.emplace(Source, 0); +    Nodes[Source].Discovery = ++Time; +    while (!Stack.empty()) { +      auto NodeIdx = Stack.top().first; +      auto EdgeIdx = Stack.top().second; + +      // If we haven't scanned all edges out of NodeIdx, continue scanning +      if (EdgeIdx < Edges[NodeIdx].size()) { +        auto &Edge = Edges[NodeIdx][EdgeIdx]; +        auto &Dst = Nodes[Edge.Dst]; +        Stack.top().second++; + +        if (Edge.OnShortestPath) { +          // If we haven't seen Edge.Dst so far, continue DFS search there +          if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) { +            Dst.Discovery = ++Time; +            Stack.emplace(Edge.Dst, 0); +            Dst.NumCalls++; +          } else if (Dst.Taken && Dst.Finish != 0) { +            // Else, if Edge.Dst already have a path to Target, so that NodeIdx +            Nodes[NodeIdx].Taken = true; +          } +        } +      } else { +        // If we are done scanning all edge out of NodeIdx +        Stack.pop(); +        // If we haven't found a path from NodeIdx to Target, forget about it +        if (!Nodes[NodeIdx].Taken) { +          Nodes[NodeIdx].Discovery = 0; +        } else { +          // If we have found a path from NodeIdx to Target, then finish NodeIdx +          // and propagate Taken flag to DFS parent unless at the Source +          Nodes[NodeIdx].Finish = ++Time; +          // NodeIdx == Source if and only if the stack is empty +          if (NodeIdx != Source) { +            assert(!Stack.empty() && "empty stack while running dfs"); +            Nodes[Stack.top().first].Taken = true; +          } +          AugmentingOrder.push_back(NodeIdx); +        } +      } +    } +    // Nodes are collected decreasing Finish time, so the order is reversed +    std::reverse(AugmentingOrder.begin(), AugmentingOrder.end()); + +    // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges +    for (size_t Src : AugmentingOrder) { +      AugmentingEdges[Src].clear(); +      for (auto &Edge : Edges[Src]) { +        uint64_t Dst = Edge.Dst; +        if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken && +            Nodes[Dst].Finish < Nodes[Src].Finish) { +          AugmentingEdges[Src].push_back(&Edge); +        } +      } +      assert((Src == Target || !AugmentingEdges[Src].empty()) && +             "incorrectly constructed augmenting edges"); +    } + +    return AugmentingOrder; +  } + +  /// Update the current flow along the given (acyclic) subgraph specified by +  /// the vertex order, AugmentingOrder. The objective is to send as much flow +  /// as possible while evenly distributing flow among successors of each node. +  /// After the update at least one edge is saturated. +  bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) { +    // Phase 0: Initialization +    for (uint64_t Src : AugmentingOrder) { +      Nodes[Src].FracFlow = 0; +      Nodes[Src].IntFlow = 0; +      for (auto &Edge : AugmentingEdges[Src]) { +        Edge->AugmentedFlow = 0; +      } +    } + +    // Phase 1: Send a unit of fractional flow along the DAG +    uint64_t MaxFlowAmount = INF; +    Nodes[Source].FracFlow = 1.0; +    for (uint64_t Src : AugmentingOrder) { +      assert((Src == Target || Nodes[Src].FracFlow > 0.0) && +             "incorrectly computed fractional flow"); +      // Distribute flow evenly among successors of Src +      uint64_t Degree = AugmentingEdges[Src].size(); +      for (auto &Edge : AugmentingEdges[Src]) { +        double EdgeFlow = Nodes[Src].FracFlow / Degree; +        Nodes[Edge->Dst].FracFlow += EdgeFlow; +        if (Edge->Capacity == INF) +          continue; +        uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow; +        MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow); +      } +    } +    // Stop early if we cannot send any (integral) flow from Source to Target +    if (MaxFlowAmount == 0) +      return false; + +    // Phase 2: Send an integral flow of MaxFlowAmount +    Nodes[Source].IntFlow = MaxFlowAmount; +    for (uint64_t Src : AugmentingOrder) { +      if (Src == Target) +        break; +      // Distribute flow evenly among successors of Src, rounding up to make +      // sure all flow is sent +      uint64_t Degree = AugmentingEdges[Src].size(); +      // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree +      uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree; +      for (auto &Edge : AugmentingEdges[Src]) { +        uint64_t Dst = Edge->Dst; +        uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow); +        EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow)); +        Nodes[Dst].IntFlow += EdgeFlow; +        Nodes[Src].IntFlow -= EdgeFlow; +        Edge->AugmentedFlow += EdgeFlow; +      } +    } +    assert(Nodes[Target].IntFlow <= MaxFlowAmount); +    Nodes[Target].IntFlow = 0; + +    // Phase 3: Send excess flow back traversing the nodes backwards. +    // Because of rounding, not all flow can be sent along the edges of Src. +    // Hence, sending the remaining flow back to maintain flow conservation +    for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) { +      uint64_t Src = AugmentingOrder[Idx - 1]; +      // Try to send excess flow back along each edge. +      // Make sure we only send back flow we just augmented (AugmentedFlow). +      for (auto &Edge : AugmentingEdges[Src]) { +        uint64_t Dst = Edge->Dst; +        if (Nodes[Dst].IntFlow == 0) +          continue; +        uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow); +        Nodes[Dst].IntFlow -= EdgeFlow; +        Nodes[Src].IntFlow += EdgeFlow; +        Edge->AugmentedFlow -= EdgeFlow; +      } +    } + +    // Phase 4: Update flow values along all edges +    bool HasSaturatedEdges = false; +    for (uint64_t Src : AugmentingOrder) { +      // Verify that we have sent all the excess flow from the node +      assert(Src == Source || Nodes[Src].IntFlow == 0); +      for (auto &Edge : AugmentingEdges[Src]) { +        assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow); +        // Update flow values along the edge and its reverse copy +        auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex]; +        Edge->Flow += Edge->AugmentedFlow; +        RevEdge.Flow -= Edge->AugmentedFlow; +        if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0) +          HasSaturatedEdges = true; +      } +    } + +    // The augmentation is successful iff at least one edge becomes saturated +    return HasSaturatedEdges; +  } + +  /// Identify candidate (shortest) edges for augmentation. +  void identifyShortestEdges(uint64_t PathCapacity) { +    assert(PathCapacity > 0 && "found an incorrect augmenting DAG"); +    // To make sure the augmentation DAG contains only edges with large residual +    // capacity, we prune all edges whose capacity is below a fraction of +    // the capacity of the augmented path. +    // (All edges of the path itself are always in the DAG) +    uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1)); + +    // Decide which edges are on a shortest path from Source to Target +    for (size_t Src = 0; Src < Nodes.size(); Src++) { +      // An edge cannot be augmenting if the endpoint has large distance +      if (Nodes[Src].Distance > Nodes[Target].Distance) +        continue; + +      for (auto &Edge : Edges[Src]) { +        uint64_t Dst = Edge.Dst; +        Edge.OnShortestPath = +            Src != Target && Dst != Source && +            Nodes[Dst].Distance <= Nodes[Target].Distance && +            Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost && +            Edge.Capacity > Edge.Flow && +            uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity; +      } +    } +  } + +  /// A node in a flow network. +  struct Node { +    /// The cost of the cheapest path from the source to the current node. +    int64_t Distance; +    /// The node preceding the current one in the path. +    uint64_t ParentNode; +    /// The index of the edge between ParentNode and the current node. +    uint64_t ParentEdgeIndex; +    /// An indicator of whether the current node is in a queue. +    bool Taken; + +    /// Data fields utilized in DAG-augmentation: +    /// Fractional flow. +    double FracFlow; +    /// Integral flow. +    uint64_t IntFlow; +    /// Discovery time. +    uint64_t Discovery; +    /// Finish time. +    uint64_t Finish; +    /// NumCalls. +    uint64_t NumCalls; +  }; + +  /// An edge in a flow network. +  struct Edge { +    /// The cost of the edge. +    int64_t Cost; +    /// The capacity of the edge. +    int64_t Capacity; +    /// The current flow on the edge. +    int64_t Flow; +    /// The destination node of the edge. +    uint64_t Dst; +    /// The index of the reverse edge between Dst and the current node. +    uint64_t RevEdgeIndex; + +    /// Data fields utilized in DAG-augmentation: +    /// Whether the edge is currently on a shortest path from Source to Target. +    bool OnShortestPath; +    /// Extra flow along the edge. +    uint64_t AugmentedFlow; +  }; + +  /// The set of network nodes. +  std::vector<Node> Nodes; +  /// The set of network edges. +  std::vector<std::vector<Edge>> Edges; +  /// Source node of the flow. +  uint64_t Source; +  /// Target (sink) node of the flow. +  uint64_t Target; +  /// Augmenting edges. +  std::vector<std::vector<Edge *>> AugmentingEdges; +}; + +constexpr int64_t MinCostMaxFlow::AuxCostUnlikely; +constexpr uint64_t MinCostMaxFlow::MinBaseDistance; + +/// A post-processing adjustment of control flow. It applies two steps by +/// rerouting some flow and making it more realistic: +/// +/// - First, it removes all isolated components ("islands") with a positive flow +///   that are unreachable from the entry block. For every such component, we +///   find the shortest from the entry to an exit passing through the component, +///   and increase the flow by one unit along the path. +/// +/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks +///   with no sampled counts. Then it rebalnces the flow that goes through such +///   a subgraph so that each branch is taken with probability 50%. +///   An unknown subgraph is such that for every two nodes u and v: +///     - u dominates v and u is not unknown; +///     - v post-dominates u; and +///     - all inner-nodes of all (u,v)-paths are unknown. +/// +class FlowAdjuster { +public: +  FlowAdjuster(FlowFunction &Func) : Func(Func) { +    assert(Func.Blocks[Func.Entry].isEntry() && +           "incorrect index of the entry block"); +  } + +  // Run the post-processing +  void run() { +    /// Adjust the flow to get rid of isolated components. +    joinIsolatedComponents(); + +    /// Rebalance the flow inside unknown subgraphs. +    rebalanceUnknownSubgraphs(); +  } + +private: +  void joinIsolatedComponents() { +    // Find blocks that are reachable from the source +    auto Visited = BitVector(NumBlocks(), false); +    findReachable(Func.Entry, Visited); + +    // Iterate over all non-reachable blocks and adjust their weights +    for (uint64_t I = 0; I < NumBlocks(); I++) { +      auto &Block = Func.Blocks[I]; +      if (Block.Flow > 0 && !Visited[I]) { +        // Find a path from the entry to an exit passing through the block I +        auto Path = findShortestPath(I); +        // Increase the flow along the path +        assert(Path.size() > 0 && Path[0]->Source == Func.Entry && +               "incorrectly computed path adjusting control flow"); +        Func.Blocks[Func.Entry].Flow += 1; +        for (auto &Jump : Path) { +          Jump->Flow += 1; +          Func.Blocks[Jump->Target].Flow += 1; +          // Update reachability +          findReachable(Jump->Target, Visited); +        } +      } +    } +  } + +  /// Run BFS from a given block along the jumps with a positive flow and mark +  /// all reachable blocks. +  void findReachable(uint64_t Src, BitVector &Visited) { +    if (Visited[Src]) +      return; +    std::queue<uint64_t> Queue; +    Queue.push(Src); +    Visited[Src] = true; +    while (!Queue.empty()) { +      Src = Queue.front(); +      Queue.pop(); +      for (auto Jump : Func.Blocks[Src].SuccJumps) { +        uint64_t Dst = Jump->Target; +        if (Jump->Flow > 0 && !Visited[Dst]) { +          Queue.push(Dst); +          Visited[Dst] = true; +        } +      } +    } +  } + +  /// Find the shortest path from the entry block to an exit block passing +  /// through a given block. +  std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { +    // A path from the entry block to BlockIdx +    auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); +    // A path from BlockIdx to an exit block +    auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); + +    // Concatenate the two paths +    std::vector<FlowJump *> Result; +    Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); +    Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); +    return Result; +  } + +  /// Apply the Dijkstra algorithm to find the shortest path from a given +  /// Source to a given Target block. +  /// If Target == -1, then the path ends at an exit block. +  std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { +    // Quit early, if possible +    if (Source == Target) +      return std::vector<FlowJump *>(); +    if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) +      return std::vector<FlowJump *>(); + +    // Initialize data structures +    auto Distance = std::vector<int64_t>(NumBlocks(), INF); +    auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); +    Distance[Source] = 0; +    std::set<std::pair<uint64_t, uint64_t>> Queue; +    Queue.insert(std::make_pair(Distance[Source], Source)); + +    // Run the Dijkstra algorithm +    while (!Queue.empty()) { +      uint64_t Src = Queue.begin()->second; +      Queue.erase(Queue.begin()); +      // If we found a solution, quit early +      if (Src == Target || +          (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) +        break; + +      for (auto Jump : Func.Blocks[Src].SuccJumps) { +        uint64_t Dst = Jump->Target; +        int64_t JumpDist = jumpDistance(Jump); +        if (Distance[Dst] > Distance[Src] + JumpDist) { +          Queue.erase(std::make_pair(Distance[Dst], Dst)); + +          Distance[Dst] = Distance[Src] + JumpDist; +          Parent[Dst] = Jump; + +          Queue.insert(std::make_pair(Distance[Dst], Dst)); +        } +      } +    } +    // If Target is not provided, find the closest exit block +    if (Target == AnyExitBlock) { +      for (uint64_t I = 0; I < NumBlocks(); I++) { +        if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { +          if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { +            Target = I; +          } +        } +      } +    } +    assert(Parent[Target] != nullptr && "a path does not exist"); + +    // Extract the constructed path +    std::vector<FlowJump *> Result; +    uint64_t Now = Target; +    while (Now != Source) { +      assert(Now == Parent[Now]->Target && "incorrect parent jump"); +      Result.push_back(Parent[Now]); +      Now = Parent[Now]->Source; +    } +    // Reverse the path, since it is extracted from Target to Source +    std::reverse(Result.begin(), Result.end()); +    return Result; +  } + +  /// A distance of a path for a given jump. +  /// In order to incite the path to use blocks/jumps with large positive flow, +  /// and avoid changing branch probability of outgoing edges drastically, +  /// set the jump distance so as: +  ///   - to minimize the number of unlikely jumps used and subject to that, +  ///   - to minimize the number of Flow == 0 jumps used and subject to that, +  ///   - minimizes total multiplicative Flow increase for the remaining edges. +  /// To capture this objective with integer distances, we round off fractional +  /// parts to a multiple of 1 / BaseDistance. +  int64_t jumpDistance(FlowJump *Jump) const { +    uint64_t BaseDistance = +        std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance), +                 std::min(Func.Blocks[Func.Entry].Flow, +                          MinCostMaxFlow::AuxCostUnlikely / NumBlocks())); +    if (Jump->IsUnlikely) +      return MinCostMaxFlow::AuxCostUnlikely; +    if (Jump->Flow > 0) +      return BaseDistance + BaseDistance / Jump->Flow; +    return BaseDistance * NumBlocks(); +  }; + +  uint64_t NumBlocks() const { return Func.Blocks.size(); } + +  /// Rebalance unknown subgraphs so that the flow is split evenly across the +  /// outgoing branches of every block of the subgraph. The method iterates over +  /// blocks with known weight and identifies unknown subgraphs rooted at the +  /// blocks. Then it verifies if flow rebalancing is feasible and applies it. +  void rebalanceUnknownSubgraphs() { +    // Try to find unknown subgraphs from each block +    for (uint64_t I = 0; I < Func.Blocks.size(); I++) { +      auto SrcBlock = &Func.Blocks[I]; +      // Verify if rebalancing rooted at SrcBlock is feasible +      if (!canRebalanceAtRoot(SrcBlock)) +        continue; + +      // Find an unknown subgraphs starting at SrcBlock. Along the way, +      // fill in known destinations and intermediate unknown blocks. +      std::vector<FlowBlock *> UnknownBlocks; +      std::vector<FlowBlock *> KnownDstBlocks; +      findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); + +      // Verify if rebalancing of the subgraph is feasible. If the search is +      // successful, find the unique destination block (which can be null) +      FlowBlock *DstBlock = nullptr; +      if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, +                                DstBlock)) +        continue; + +      // We cannot rebalance subgraphs containing cycles among unknown blocks +      if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) +        continue; + +      // Rebalance the flow +      rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); +    } +  } + +  /// Verify if rebalancing rooted at a given block is possible. +  bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { +    // Do not attempt to find unknown subgraphs from an unknown or a +    // zero-flow block +    if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) +      return false; + +    // Do not attempt to process subgraphs from a block w/o unknown sucessors +    bool HasUnknownSuccs = false; +    for (auto Jump : SrcBlock->SuccJumps) { +      if (Func.Blocks[Jump->Target].UnknownWeight) { +        HasUnknownSuccs = true; +        break; +      } +    } +    if (!HasUnknownSuccs) +      return false; + +    return true; +  } + +  /// Find an unknown subgraph starting at block SrcBlock. The method sets +  /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. +  void findUnknownSubgraph(const FlowBlock *SrcBlock, +                           std::vector<FlowBlock *> &KnownDstBlocks, +                           std::vector<FlowBlock *> &UnknownBlocks) { +    // Run BFS from SrcBlock and make sure all paths are going through unknown +    // blocks and end at a known DstBlock +    auto Visited = BitVector(NumBlocks(), false); +    std::queue<uint64_t> Queue; + +    Queue.push(SrcBlock->Index); +    Visited[SrcBlock->Index] = true; +    while (!Queue.empty()) { +      auto &Block = Func.Blocks[Queue.front()]; +      Queue.pop(); +      // Process blocks reachable from Block +      for (auto Jump : Block.SuccJumps) { +        // If Jump can be ignored, skip it +        if (ignoreJump(SrcBlock, nullptr, Jump)) +          continue; + +        uint64_t Dst = Jump->Target; +        // If Dst has been visited, skip Jump +        if (Visited[Dst]) +          continue; +        // Process block Dst +        Visited[Dst] = true; +        if (!Func.Blocks[Dst].UnknownWeight) { +          KnownDstBlocks.push_back(&Func.Blocks[Dst]); +        } else { +          Queue.push(Dst); +          UnknownBlocks.push_back(&Func.Blocks[Dst]); +        } +      } +    } +  } + +  /// Verify if rebalancing of the subgraph is feasible. If the checks are +  /// successful, set the unique destination block, DstBlock (can be null). +  bool canRebalanceSubgraph(const FlowBlock *SrcBlock, +                            const std::vector<FlowBlock *> &KnownDstBlocks, +                            const std::vector<FlowBlock *> &UnknownBlocks, +                            FlowBlock *&DstBlock) { +    // If the list of unknown blocks is empty, we don't need rebalancing +    if (UnknownBlocks.empty()) +      return false; + +    // If there are multiple known sinks, we can't rebalance +    if (KnownDstBlocks.size() > 1) +      return false; +    DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); + +    // Verify sinks of the subgraph +    for (auto Block : UnknownBlocks) { +      if (Block->SuccJumps.empty()) { +        // If there are multiple (known and unknown) sinks, we can't rebalance +        if (DstBlock != nullptr) +          return false; +        continue; +      } +      size_t NumIgnoredJumps = 0; +      for (auto Jump : Block->SuccJumps) { +        if (ignoreJump(SrcBlock, DstBlock, Jump)) +          NumIgnoredJumps++; +      } +      // If there is a non-sink block in UnknownBlocks with all jumps ignored, +      // then we can't rebalance +      if (NumIgnoredJumps == Block->SuccJumps.size()) +        return false; +    } + +    return true; +  } + +  /// Decide whether the Jump is ignored while processing an unknown subgraphs +  /// rooted at basic block SrcBlock with the destination block, DstBlock. +  bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, +                  const FlowJump *Jump) { +    // Ignore unlikely jumps with zero flow +    if (Jump->IsUnlikely && Jump->Flow == 0) +      return true; + +    auto JumpSource = &Func.Blocks[Jump->Source]; +    auto JumpTarget = &Func.Blocks[Jump->Target]; + +    // Do not ignore jumps coming into DstBlock +    if (DstBlock != nullptr && JumpTarget == DstBlock) +      return false; + +    // Ignore jumps out of SrcBlock to known blocks +    if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) +      return true; + +    // Ignore jumps to known blocks with zero flow +    if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) +      return true; + +    return false; +  } + +  /// Verify if the given unknown subgraph is acyclic, and if yes, reorder +  /// UnknownBlocks in the topological order (so that all jumps are "forward"). +  bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, +                         std::vector<FlowBlock *> &UnknownBlocks) { +    // Extract local in-degrees in the considered subgraph +    auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); +    auto fillInDegree = [&](const FlowBlock *Block) { +      for (auto Jump : Block->SuccJumps) { +        if (ignoreJump(SrcBlock, DstBlock, Jump)) +          continue; +        LocalInDegree[Jump->Target]++; +      } +    }; +    fillInDegree(SrcBlock); +    for (auto Block : UnknownBlocks) { +      fillInDegree(Block); +    } +    // A loop containing SrcBlock +    if (LocalInDegree[SrcBlock->Index] > 0) +      return false; + +    std::vector<FlowBlock *> AcyclicOrder; +    std::queue<uint64_t> Queue; +    Queue.push(SrcBlock->Index); +    while (!Queue.empty()) { +      FlowBlock *Block = &Func.Blocks[Queue.front()]; +      Queue.pop(); +      // Stop propagation once we reach DstBlock, if any +      if (DstBlock != nullptr && Block == DstBlock) +        break; + +      // Keep an acyclic order of unknown blocks +      if (Block->UnknownWeight && Block != SrcBlock) +        AcyclicOrder.push_back(Block); + +      // Add to the queue all successors with zero local in-degree +      for (auto Jump : Block->SuccJumps) { +        if (ignoreJump(SrcBlock, DstBlock, Jump)) +          continue; +        uint64_t Dst = Jump->Target; +        LocalInDegree[Dst]--; +        if (LocalInDegree[Dst] == 0) { +          Queue.push(Dst); +        } +      } +    } + +    // If there is a cycle in the subgraph, AcyclicOrder contains only a subset +    // of all blocks +    if (UnknownBlocks.size() != AcyclicOrder.size()) +      return false; +    UnknownBlocks = AcyclicOrder; +    return true; +  } + +  /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and +  /// having UnknownBlocks intermediate blocks. +  void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, +                                const FlowBlock *DstBlock, +                                const std::vector<FlowBlock *> &UnknownBlocks) { +    assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); + +    // Ditribute flow from the source block +    uint64_t BlockFlow = 0; +    // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps +    for (auto Jump : SrcBlock->SuccJumps) { +      if (ignoreJump(SrcBlock, DstBlock, Jump)) +        continue; +      BlockFlow += Jump->Flow; +    } +    rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); + +    // Ditribute flow from the remaining blocks +    for (auto Block : UnknownBlocks) { +      assert(Block->UnknownWeight && "incorrect unknown subgraph"); +      uint64_t BlockFlow = 0; +      // Block's flow is the sum of incoming flows +      for (auto Jump : Block->PredJumps) { +        BlockFlow += Jump->Flow; +      } +      Block->Flow = BlockFlow; +      rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); +    } +  } + +  /// Redistribute flow for a block in a subgraph rooted at SrcBlock, +  /// and ending at DstBlock. +  void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, +                      const FlowBlock *Block, uint64_t BlockFlow) { +    // Process all successor jumps and update corresponding flow values +    size_t BlockDegree = 0; +    for (auto Jump : Block->SuccJumps) { +      if (ignoreJump(SrcBlock, DstBlock, Jump)) +        continue; +      BlockDegree++; +    } +    // If all successor jumps of the block are ignored, skip it +    if (DstBlock == nullptr && BlockDegree == 0) +      return; +    assert(BlockDegree > 0 && "all outgoing jumps are ignored"); + +    // Each of the Block's successors gets the following amount of flow. +    // Rounding the value up so that all flow is propagated +    uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; +    for (auto Jump : Block->SuccJumps) { +      if (ignoreJump(SrcBlock, DstBlock, Jump)) +        continue; +      uint64_t Flow = std::min(SuccFlow, BlockFlow); +      Jump->Flow = Flow; +      BlockFlow -= Flow; +    } +    assert(BlockFlow == 0 && "not all flow is propagated"); +  } + +  /// A constant indicating an arbitrary exit block of a function. +  static constexpr uint64_t AnyExitBlock = uint64_t(-1); + +  /// The function. +  FlowFunction &Func; +}; + +/// Initializing flow network for a given function. +/// +/// Every block is split into three nodes that are responsible for (i) an +/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or +/// reduction of the block weight. +void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { +  uint64_t NumBlocks = Func.Blocks.size(); +  assert(NumBlocks > 1 && "Too few blocks in a function"); +  LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); + +  // Pre-process data: make sure the entry weight is at least 1 +  if (Func.Blocks[Func.Entry].Weight == 0) { +    Func.Blocks[Func.Entry].Weight = 1; +  } +  // Introducing dummy source/sink pairs to allow flow circulation. +  // The nodes corresponding to blocks of Func have indicies in the range +  // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. +  uint64_t S = 3 * NumBlocks; +  uint64_t T = S + 1; +  uint64_t S1 = S + 2; +  uint64_t T1 = S + 3; + +  Network.initialize(3 * NumBlocks + 4, S1, T1); + +  // Create three nodes for every block of the function +  for (uint64_t B = 0; B < NumBlocks; B++) { +    auto &Block = Func.Blocks[B]; +    assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && +           "non-zero weight of a block w/o weight except for an entry"); + +    // Split every block into two nodes +    uint64_t Bin = 3 * B; +    uint64_t Bout = 3 * B + 1; +    uint64_t Baux = 3 * B + 2; +    if (Block.Weight > 0) { +      Network.addEdge(S1, Bout, Block.Weight, 0); +      Network.addEdge(Bin, T1, Block.Weight, 0); +    } + +    // Edges from S and to T +    assert((!Block.isEntry() || !Block.isExit()) && +           "a block cannot be an entry and an exit"); +    if (Block.isEntry()) { +      Network.addEdge(S, Bin, 0); +    } else if (Block.isExit()) { +      Network.addEdge(Bout, T, 0); +    } + +    // An auxiliary node to allow increase/reduction of block counts: +    // We assume that decreasing block counts is more expensive than increasing, +    // and thus, setting separate costs here. In the future we may want to tune +    // the relative costs so as to maximize the quality of generated profiles. +    int64_t AuxCostInc = SampleProfileProfiCostInc; +    int64_t AuxCostDec = SampleProfileProfiCostDec; +    if (Block.UnknownWeight) { +      // Do not penalize changing weights of blocks w/o known profile count +      AuxCostInc = 0; +      AuxCostDec = 0; +    } else { +      // Increasing the count for "cold" blocks with zero initial count is more +      // expensive than for "hot" ones +      if (Block.Weight == 0) { +        AuxCostInc = SampleProfileProfiCostIncZero; +      } +      // Modifying the count of the entry block is expensive +      if (Block.isEntry()) { +        AuxCostInc = SampleProfileProfiCostIncEntry; +        AuxCostDec = SampleProfileProfiCostDecEntry; +      } +    } +    // For blocks with self-edges, do not penalize a reduction of the count, +    // as all of the increase can be attributed to the self-edge +    if (Block.HasSelfEdge) { +      AuxCostDec = 0; +    } + +    Network.addEdge(Bin, Baux, AuxCostInc); +    Network.addEdge(Baux, Bout, AuxCostInc); +    if (Block.Weight > 0) { +      Network.addEdge(Bout, Baux, AuxCostDec); +      Network.addEdge(Baux, Bin, AuxCostDec); +    } +  } + +  // Creating edges for every jump +  for (auto &Jump : Func.Jumps) { +    uint64_t Src = Jump.Source; +    uint64_t Dst = Jump.Target; +    if (Src != Dst) { +      uint64_t SrcOut = 3 * Src + 1; +      uint64_t DstIn = 3 * Dst; +      uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; +      Network.addEdge(SrcOut, DstIn, Cost); +    } +  } + +  // Make sure we have a valid flow circulation +  Network.addEdge(T, S, 0); +} + +/// Extract resulting block and edge counts from the flow network. +void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { +  uint64_t NumBlocks = Func.Blocks.size(); + +  // Extract resulting block counts +  for (uint64_t Src = 0; Src < NumBlocks; Src++) { +    auto &Block = Func.Blocks[Src]; +    uint64_t SrcOut = 3 * Src + 1; +    int64_t Flow = 0; +    for (auto &Adj : Network.getFlow(SrcOut)) { +      uint64_t DstIn = Adj.first; +      int64_t DstFlow = Adj.second; +      bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); +      if (!IsAuxNode || Block.HasSelfEdge) { +        Flow += DstFlow; +      } +    } +    Block.Flow = Flow; +    assert(Flow >= 0 && "negative block flow"); +  } + +  // Extract resulting jump counts +  for (auto &Jump : Func.Jumps) { +    uint64_t Src = Jump.Source; +    uint64_t Dst = Jump.Target; +    int64_t Flow = 0; +    if (Src != Dst) { +      uint64_t SrcOut = 3 * Src + 1; +      uint64_t DstIn = 3 * Dst; +      Flow = Network.getFlow(SrcOut, DstIn); +    } else { +      uint64_t SrcOut = 3 * Src + 1; +      uint64_t SrcAux = 3 * Src + 2; +      int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); +      if (AuxFlow > 0) +        Flow = AuxFlow; +    } +    Jump.Flow = Flow; +    assert(Flow >= 0 && "negative jump flow"); +  } +} + +#ifndef NDEBUG +/// Verify that the computed flow values satisfy flow conservation rules +void verifyWeights(const FlowFunction &Func) { +  const uint64_t NumBlocks = Func.Blocks.size(); +  auto InFlow = std::vector<uint64_t>(NumBlocks, 0); +  auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); +  for (auto &Jump : Func.Jumps) { +    InFlow[Jump.Target] += Jump.Flow; +    OutFlow[Jump.Source] += Jump.Flow; +  } + +  uint64_t TotalInFlow = 0; +  uint64_t TotalOutFlow = 0; +  for (uint64_t I = 0; I < NumBlocks; I++) { +    auto &Block = Func.Blocks[I]; +    if (Block.isEntry()) { +      TotalInFlow += Block.Flow; +      assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); +    } else if (Block.isExit()) { +      TotalOutFlow += Block.Flow; +      assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); +    } else { +      assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); +      assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); +    } +  } +  assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); + +  // Verify that there are no isolated flow components +  // One could modify FlowFunction to hold edges indexed by the sources, which +  // will avoid a creation of the object +  auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); +  for (auto &Jump : Func.Jumps) { +    if (Jump.Flow > 0) { +      PositiveFlowEdges[Jump.Source].push_back(Jump.Target); +    } +  } + +  // Run BFS from the source along edges with positive flow +  std::queue<uint64_t> Queue; +  auto Visited = BitVector(NumBlocks, false); +  Queue.push(Func.Entry); +  Visited[Func.Entry] = true; +  while (!Queue.empty()) { +    uint64_t Src = Queue.front(); +    Queue.pop(); +    for (uint64_t Dst : PositiveFlowEdges[Src]) { +      if (!Visited[Dst]) { +        Queue.push(Dst); +        Visited[Dst] = true; +      } +    } +  } + +  // Verify that every block that has a positive flow is reached from the source +  // along edges with a positive flow +  for (uint64_t I = 0; I < NumBlocks; I++) { +    auto &Block = Func.Blocks[I]; +    assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); +  } +} +#endif + +} // end of anonymous namespace + +/// Apply the profile inference algorithm for a given flow function +void llvm::applyFlowInference(FlowFunction &Func) { +  // Create and apply an inference network model +  auto InferenceNetwork = MinCostMaxFlow(); +  initializeNetwork(InferenceNetwork, Func); +  InferenceNetwork.run(); + +  // Extract flow values for every block and every edge +  extractWeights(InferenceNetwork, Func); + +  // Post-processing adjustments to the flow +  auto Adjuster = FlowAdjuster(Func); +  Adjuster.run(); + +#ifndef NDEBUG +  // Verify the result +  verifyWeights(Func); +#endif +}  | 
