diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-04-14 21:41:27 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-06-22 18:20:56 +0000 |
commit | bdd1243df58e60e85101c09001d9812a789b6bc4 (patch) | |
tree | a1ce621c7301dd47ba2ddc3b8eaa63b441389481 /contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp | |
parent | 781624ca2d054430052c828ba8d2c2eaf2d733e7 (diff) | |
parent | e3b557809604d036af6e00c60f012c2025b59a5e (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp | 463 |
1 files changed, 277 insertions, 186 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp index 5e92b9852a9f..691ee00bd831 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -26,34 +26,42 @@ using namespace llvm; namespace { -static cl::opt<bool> SampleProfileEvenCountDistribution( - "sample-profile-even-count-distribution", cl::init(true), cl::Hidden, - cl::desc("Try to evenly distribute counts when there are multiple equally " +static cl::opt<bool> SampleProfileEvenFlowDistribution( + "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden, + cl::desc("Try to evenly distribute flow when there are multiple equally " "likely options.")); -static cl::opt<unsigned> SampleProfileMaxDfsCalls( - "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, - cl::desc("Maximum number of dfs iterations for even count distribution.")); +static cl::opt<bool> SampleProfileRebalanceUnknown( + "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden, + cl::desc("Evenly re-distribute flow among unknown subgraphs.")); -static cl::opt<unsigned> SampleProfileProfiCostInc( - "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden, - cl::desc("A cost of increasing a block's count by one.")); +static cl::opt<bool> SampleProfileJoinIslands( + "sample-profile-join-islands", cl::init(true), cl::Hidden, + cl::desc("Join isolated components having positive flow.")); -static cl::opt<unsigned> SampleProfileProfiCostDec( - "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden, - cl::desc("A cost of decreasing a block's count by one.")); +static cl::opt<unsigned> SampleProfileProfiCostBlockInc( + "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden, + cl::desc("The cost of increasing a block's count by one.")); -static cl::opt<unsigned> SampleProfileProfiCostIncZero( - "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden, - cl::desc("A cost of increasing a count of zero-weight block by one.")); +static cl::opt<unsigned> SampleProfileProfiCostBlockDec( + "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden, + cl::desc("The cost of decreasing a block's count by one.")); -static cl::opt<unsigned> SampleProfileProfiCostIncEntry( - "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden, - cl::desc("A cost of increasing the entry block's count by one.")); +static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc( + "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden, + cl::desc("The cost of increasing the entry block's count by one.")); -static cl::opt<unsigned> SampleProfileProfiCostDecEntry( - "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden, - cl::desc("A cost of decreasing the entry block's count by one.")); +static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec( + "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden, + cl::desc("The cost of decreasing the entry block's count by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc( + "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden, + cl::desc("The cost of increasing a count of zero-weight block by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc( + "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden, + cl::desc("The cost of increasing an unknown block's count by one.")); /// A value indicating an infinite flow/capacity/weight of a block/edge. /// Not using numeric_limits<int64_t>::max(), as the values can be summed up @@ -76,6 +84,8 @@ static constexpr int64_t INF = ((int64_t)1) << 50; /// minimum total cost respecting the given edge capacities. class MinCostMaxFlow { public: + MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {} + // Initialize algorithm's data structures for a network of a given size. void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { Source = SourceNode; @@ -83,13 +93,15 @@ public: Nodes = std::vector<Node>(NodeCount); Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); - if (SampleProfileEvenCountDistribution) + if (Params.EvenFlowDistribution) AugmentingEdges = std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>()); } // Run the algorithm. int64_t run() { + LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n"); + // Iteratively find an augmentation path/dag in the network and send the // flow along its edges size_t AugmentationIters = applyFlowAugmentation(); @@ -148,7 +160,7 @@ public: /// Returns a list of pairs (target node, amount of flow to the target). const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { std::vector<std::pair<uint64_t, int64_t>> Flow; - for (auto &Edge : Edges[Src]) { + for (const auto &Edge : Edges[Src]) { if (Edge.Flow > 0) Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); } @@ -158,7 +170,7 @@ public: /// Get the total flow between a pair of nodes. int64_t getFlow(uint64_t Src, uint64_t Dst) const { int64_t Flow = 0; - for (auto &Edge : Edges[Src]) { + for (const auto &Edge : Edges[Src]) { if (Edge.Dst == Dst) { Flow += Edge.Flow; } @@ -166,11 +178,6 @@ public: return Flow; } - /// A cost of taking an unlikely jump. - static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; - /// Minimum BaseDistance for the jump distance values in island joining. - static constexpr uint64_t MinBaseDistance = 10000; - private: /// Iteratively find an augmentation path/dag in the network and send the /// flow along its edges. The method returns the number of applied iterations. @@ -180,7 +187,7 @@ private: uint64_t PathCapacity = computeAugmentingPathCapacity(); while (PathCapacity > 0) { bool Progress = false; - if (SampleProfileEvenCountDistribution) { + if (Params.EvenFlowDistribution) { // Identify node/edge candidates for augmentation identifyShortestEdges(PathCapacity); @@ -253,7 +260,7 @@ private: // from Source to Target; it follows from inequalities // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] // >= Dist[Source, V] - if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0) + if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0) break; if (Nodes[Src].Distance > Nodes[Target].Distance) continue; @@ -342,7 +349,7 @@ private: if (Edge.OnShortestPath) { // If we haven't seen Edge.Dst so far, continue DFS search there - if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) { + if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) { Dst.Discovery = ++Time; Stack.emplace(Edge.Dst, 0); Dst.NumCalls++; @@ -512,6 +519,9 @@ private: } } + /// Maximum number of DFS iterations for DAG finding. + static constexpr uint64_t MaxDfsCalls = 10; + /// A node in a flow network. struct Node { /// The cost of the cheapest path from the source to the current node. @@ -566,12 +576,11 @@ private: uint64_t Target; /// Augmenting edges. std::vector<std::vector<Edge *>> AugmentingEdges; + /// Params for flow computation. + const ProfiParams &Params; }; -constexpr int64_t MinCostMaxFlow::AuxCostUnlikely; -constexpr uint64_t MinCostMaxFlow::MinBaseDistance; - -/// A post-processing adjustment of control flow. It applies two steps by +/// A post-processing adjustment of the control flow. It applies two steps by /// rerouting some flow and making it more realistic: /// /// - First, it removes all isolated components ("islands") with a positive flow @@ -589,18 +598,20 @@ constexpr uint64_t MinCostMaxFlow::MinBaseDistance; /// class FlowAdjuster { public: - FlowAdjuster(FlowFunction &Func) : Func(Func) { - assert(Func.Blocks[Func.Entry].isEntry() && - "incorrect index of the entry block"); - } + FlowAdjuster(const ProfiParams &Params, FlowFunction &Func) + : Params(Params), Func(Func) {} - // Run the post-processing + /// Apply the post-processing. void run() { - /// Adjust the flow to get rid of isolated components. - joinIsolatedComponents(); + if (Params.JoinIslands) { + // Adjust the flow to get rid of isolated components + joinIsolatedComponents(); + } - /// Rebalance the flow inside unknown subgraphs. - rebalanceUnknownSubgraphs(); + if (Params.RebalanceUnknown) { + // Rebalance the flow inside unknown subgraphs + rebalanceUnknownSubgraphs(); + } } private: @@ -640,7 +651,7 @@ private: while (!Queue.empty()) { Src = Queue.front(); Queue.pop(); - for (auto Jump : Func.Blocks[Src].SuccJumps) { + for (auto *Jump : Func.Blocks[Src].SuccJumps) { uint64_t Dst = Jump->Target; if (Jump->Flow > 0 && !Visited[Dst]) { Queue.push(Dst); @@ -691,7 +702,7 @@ private: (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) break; - for (auto Jump : Func.Blocks[Src].SuccJumps) { + for (auto *Jump : Func.Blocks[Src].SuccJumps) { uint64_t Dst = Jump->Target; int64_t JumpDist = jumpDistance(Jump); if (Distance[Dst] > Distance[Src] + JumpDist) { @@ -739,15 +750,15 @@ private: /// To capture this objective with integer distances, we round off fractional /// parts to a multiple of 1 / BaseDistance. int64_t jumpDistance(FlowJump *Jump) const { + if (Jump->IsUnlikely) + return Params.CostUnlikely; uint64_t BaseDistance = - std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance), + std::max(FlowAdjuster::MinBaseDistance, std::min(Func.Blocks[Func.Entry].Flow, - MinCostMaxFlow::AuxCostUnlikely / NumBlocks())); - if (Jump->IsUnlikely) - return MinCostMaxFlow::AuxCostUnlikely; + Params.CostUnlikely / (2 * (NumBlocks() + 1)))); if (Jump->Flow > 0) return BaseDistance + BaseDistance / Jump->Flow; - return BaseDistance * NumBlocks(); + return 2 * BaseDistance * (NumBlocks() + 1); }; uint64_t NumBlocks() const { return Func.Blocks.size(); } @@ -758,31 +769,30 @@ private: /// blocks. Then it verifies if flow rebalancing is feasible and applies it. void rebalanceUnknownSubgraphs() { // Try to find unknown subgraphs from each block - for (uint64_t I = 0; I < Func.Blocks.size(); I++) { - auto SrcBlock = &Func.Blocks[I]; + for (const FlowBlock &SrcBlock : Func.Blocks) { // Verify if rebalancing rooted at SrcBlock is feasible - if (!canRebalanceAtRoot(SrcBlock)) + if (!canRebalanceAtRoot(&SrcBlock)) continue; // Find an unknown subgraphs starting at SrcBlock. Along the way, // fill in known destinations and intermediate unknown blocks. std::vector<FlowBlock *> UnknownBlocks; std::vector<FlowBlock *> KnownDstBlocks; - findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); + findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks); // Verify if rebalancing of the subgraph is feasible. If the search is // successful, find the unique destination block (which can be null) FlowBlock *DstBlock = nullptr; - if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, + if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks, DstBlock)) continue; // We cannot rebalance subgraphs containing cycles among unknown blocks - if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) + if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks)) continue; // Rebalance the flow - rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); + rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks); } } @@ -790,13 +800,13 @@ private: bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { // Do not attempt to find unknown subgraphs from an unknown or a // zero-flow block - if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) + if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0) return false; // Do not attempt to process subgraphs from a block w/o unknown sucessors bool HasUnknownSuccs = false; - for (auto Jump : SrcBlock->SuccJumps) { - if (Func.Blocks[Jump->Target].UnknownWeight) { + for (auto *Jump : SrcBlock->SuccJumps) { + if (Func.Blocks[Jump->Target].HasUnknownWeight) { HasUnknownSuccs = true; break; } @@ -823,7 +833,7 @@ private: auto &Block = Func.Blocks[Queue.front()]; Queue.pop(); // Process blocks reachable from Block - for (auto Jump : Block.SuccJumps) { + for (auto *Jump : Block.SuccJumps) { // If Jump can be ignored, skip it if (ignoreJump(SrcBlock, nullptr, Jump)) continue; @@ -834,7 +844,7 @@ private: continue; // Process block Dst Visited[Dst] = true; - if (!Func.Blocks[Dst].UnknownWeight) { + if (!Func.Blocks[Dst].HasUnknownWeight) { KnownDstBlocks.push_back(&Func.Blocks[Dst]); } else { Queue.push(Dst); @@ -860,7 +870,7 @@ private: DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); // Verify sinks of the subgraph - for (auto Block : UnknownBlocks) { + for (auto *Block : UnknownBlocks) { if (Block->SuccJumps.empty()) { // If there are multiple (known and unknown) sinks, we can't rebalance if (DstBlock != nullptr) @@ -868,7 +878,7 @@ private: continue; } size_t NumIgnoredJumps = 0; - for (auto Jump : Block->SuccJumps) { + for (auto *Jump : Block->SuccJumps) { if (ignoreJump(SrcBlock, DstBlock, Jump)) NumIgnoredJumps++; } @@ -897,11 +907,11 @@ private: return false; // Ignore jumps out of SrcBlock to known blocks - if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) + if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock) return true; // Ignore jumps to known blocks with zero flow - if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) + if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0) return true; return false; @@ -914,14 +924,14 @@ private: // Extract local in-degrees in the considered subgraph auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); auto fillInDegree = [&](const FlowBlock *Block) { - for (auto Jump : Block->SuccJumps) { + for (auto *Jump : Block->SuccJumps) { if (ignoreJump(SrcBlock, DstBlock, Jump)) continue; LocalInDegree[Jump->Target]++; } }; fillInDegree(SrcBlock); - for (auto Block : UnknownBlocks) { + for (auto *Block : UnknownBlocks) { fillInDegree(Block); } // A loop containing SrcBlock @@ -939,11 +949,11 @@ private: break; // Keep an acyclic order of unknown blocks - if (Block->UnknownWeight && Block != SrcBlock) + if (Block->HasUnknownWeight && Block != SrcBlock) AcyclicOrder.push_back(Block); // Add to the queue all successors with zero local in-degree - for (auto Jump : Block->SuccJumps) { + for (auto *Jump : Block->SuccJumps) { if (ignoreJump(SrcBlock, DstBlock, Jump)) continue; uint64_t Dst = Jump->Target; @@ -972,7 +982,7 @@ private: // Ditribute flow from the source block uint64_t BlockFlow = 0; // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps - for (auto Jump : SrcBlock->SuccJumps) { + for (auto *Jump : SrcBlock->SuccJumps) { if (ignoreJump(SrcBlock, DstBlock, Jump)) continue; BlockFlow += Jump->Flow; @@ -980,11 +990,11 @@ private: rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); // Ditribute flow from the remaining blocks - for (auto Block : UnknownBlocks) { - assert(Block->UnknownWeight && "incorrect unknown subgraph"); + for (auto *Block : UnknownBlocks) { + assert(Block->HasUnknownWeight && "incorrect unknown subgraph"); uint64_t BlockFlow = 0; // Block's flow is the sum of incoming flows - for (auto Jump : Block->PredJumps) { + for (auto *Jump : Block->PredJumps) { BlockFlow += Jump->Flow; } Block->Flow = BlockFlow; @@ -998,7 +1008,7 @@ private: const FlowBlock *Block, uint64_t BlockFlow) { // Process all successor jumps and update corresponding flow values size_t BlockDegree = 0; - for (auto Jump : Block->SuccJumps) { + for (auto *Jump : Block->SuccJumps) { if (ignoreJump(SrcBlock, DstBlock, Jump)) continue; BlockDegree++; @@ -1011,7 +1021,7 @@ private: // Each of the Block's successors gets the following amount of flow. // Rounding the value up so that all flow is propagated uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; - for (auto Jump : Block->SuccJumps) { + for (auto *Jump : Block->SuccJumps) { if (ignoreJump(SrcBlock, DstBlock, Jump)) continue; uint64_t Flow = std::min(SuccFlow, BlockFlow); @@ -1023,104 +1033,88 @@ private: /// A constant indicating an arbitrary exit block of a function. static constexpr uint64_t AnyExitBlock = uint64_t(-1); + /// Minimum BaseDistance for the jump distance values in island joining. + static constexpr uint64_t MinBaseDistance = 10000; + /// Params for flow computation. + const ProfiParams &Params; /// The function. FlowFunction &Func; }; +std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params, + const FlowBlock &Block); +std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params, + const FlowJump &Jump); + /// Initializing flow network for a given function. /// -/// Every block is split into three nodes that are responsible for (i) an -/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or +/// Every block is split into two nodes that are responsible for (i) an +/// incoming flow, (ii) an outgoing flow; they penalize an increase or a /// reduction of the block weight. -void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { +void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network, + FlowFunction &Func) { uint64_t NumBlocks = Func.Blocks.size(); assert(NumBlocks > 1 && "Too few blocks in a function"); - LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); + uint64_t NumJumps = Func.Jumps.size(); + assert(NumJumps > 0 && "Too few jumps in a function"); - // Pre-process data: make sure the entry weight is at least 1 - if (Func.Blocks[Func.Entry].Weight == 0) { - Func.Blocks[Func.Entry].Weight = 1; - } // Introducing dummy source/sink pairs to allow flow circulation. - // The nodes corresponding to blocks of Func have indicies in the range - // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. - uint64_t S = 3 * NumBlocks; + // The nodes corresponding to blocks of the function have indicies in + // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the + // next four values. + uint64_t S = 2 * NumBlocks; uint64_t T = S + 1; uint64_t S1 = S + 2; uint64_t T1 = S + 3; - Network.initialize(3 * NumBlocks + 4, S1, T1); + Network.initialize(2 * NumBlocks + 4, S1, T1); - // Create three nodes for every block of the function + // Initialize nodes of the flow network for (uint64_t B = 0; B < NumBlocks; B++) { auto &Block = Func.Blocks[B]; - assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && - "non-zero weight of a block w/o weight except for an entry"); - // Split every block into two nodes - uint64_t Bin = 3 * B; - uint64_t Bout = 3 * B + 1; - uint64_t Baux = 3 * B + 2; - if (Block.Weight > 0) { - Network.addEdge(S1, Bout, Block.Weight, 0); - Network.addEdge(Bin, T1, Block.Weight, 0); - } + // Split every block into two auxiliary nodes to allow + // increase/reduction of the block count. + uint64_t Bin = 2 * B; + uint64_t Bout = 2 * B + 1; // Edges from S and to T - assert((!Block.isEntry() || !Block.isExit()) && - "a block cannot be an entry and an exit"); if (Block.isEntry()) { Network.addEdge(S, Bin, 0); } else if (Block.isExit()) { Network.addEdge(Bout, T, 0); } - // An auxiliary node to allow increase/reduction of block counts: - // We assume that decreasing block counts is more expensive than increasing, - // and thus, setting separate costs here. In the future we may want to tune - // the relative costs so as to maximize the quality of generated profiles. - int64_t AuxCostInc = SampleProfileProfiCostInc; - int64_t AuxCostDec = SampleProfileProfiCostDec; - if (Block.UnknownWeight) { - // Do not penalize changing weights of blocks w/o known profile count - AuxCostInc = 0; - AuxCostDec = 0; - } else { - // Increasing the count for "cold" blocks with zero initial count is more - // expensive than for "hot" ones - if (Block.Weight == 0) { - AuxCostInc = SampleProfileProfiCostIncZero; - } - // Modifying the count of the entry block is expensive - if (Block.isEntry()) { - AuxCostInc = SampleProfileProfiCostIncEntry; - AuxCostDec = SampleProfileProfiCostDecEntry; - } - } - // For blocks with self-edges, do not penalize a reduction of the count, - // as all of the increase can be attributed to the self-edge - if (Block.HasSelfEdge) { - AuxCostDec = 0; - } + // Assign costs for increasing/decreasing the block counts + auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block); - Network.addEdge(Bin, Baux, AuxCostInc); - Network.addEdge(Baux, Bout, AuxCostInc); + // Add the corresponding edges to the network + Network.addEdge(Bin, Bout, AuxCostInc); if (Block.Weight > 0) { - Network.addEdge(Bout, Baux, AuxCostDec); - Network.addEdge(Baux, Bin, AuxCostDec); + Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec); + Network.addEdge(S1, Bout, Block.Weight, 0); + Network.addEdge(Bin, T1, Block.Weight, 0); } } - // Creating edges for every jump - for (auto &Jump : Func.Jumps) { - uint64_t Src = Jump.Source; - uint64_t Dst = Jump.Target; - if (Src != Dst) { - uint64_t SrcOut = 3 * Src + 1; - uint64_t DstIn = 3 * Dst; - uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; - Network.addEdge(SrcOut, DstIn, Cost); + // Initialize edges of the flow network + for (uint64_t J = 0; J < NumJumps; J++) { + auto &Jump = Func.Jumps[J]; + + // Get the endpoints corresponding to the jump + uint64_t Jin = 2 * Jump.Source + 1; + uint64_t Jout = 2 * Jump.Target; + + // Assign costs for increasing/decreasing the jump counts + auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump); + + // Add the corresponding edges to the network + Network.addEdge(Jin, Jout, AuxCostInc); + if (Jump.Weight > 0) { + Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec); + Network.addEdge(S1, Jout, Jump.Weight, 0); + Network.addEdge(Jin, T1, Jump.Weight, 0); } } @@ -1128,55 +1122,130 @@ void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { Network.addEdge(T, S, 0); } -/// Extract resulting block and edge counts from the flow network. -void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { - uint64_t NumBlocks = Func.Blocks.size(); - - // Extract resulting block counts - for (uint64_t Src = 0; Src < NumBlocks; Src++) { - auto &Block = Func.Blocks[Src]; - uint64_t SrcOut = 3 * Src + 1; - int64_t Flow = 0; - for (auto &Adj : Network.getFlow(SrcOut)) { - uint64_t DstIn = Adj.first; - int64_t DstFlow = Adj.second; - bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); - if (!IsAuxNode || Block.HasSelfEdge) { - Flow += DstFlow; - } +/// Assign costs for increasing/decreasing the block counts. +std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params, + const FlowBlock &Block) { + // Modifying the weight of an unlikely block is expensive + if (Block.IsUnlikely) + return std::make_pair(Params.CostUnlikely, Params.CostUnlikely); + + // Assign default values for the costs + int64_t CostInc = Params.CostBlockInc; + int64_t CostDec = Params.CostBlockDec; + // Update the costs depending on the block metadata + if (Block.HasUnknownWeight) { + CostInc = Params.CostBlockUnknownInc; + CostDec = 0; + } else { + // Increasing the count for "cold" blocks with zero initial count is more + // expensive than for "hot" ones + if (Block.Weight == 0) + CostInc = Params.CostBlockZeroInc; + // Modifying the count of the entry block is expensive + if (Block.isEntry()) { + CostInc = Params.CostBlockEntryInc; + CostDec = Params.CostBlockEntryDec; } - Block.Flow = Flow; - assert(Flow >= 0 && "negative block flow"); } + return std::make_pair(CostInc, CostDec); +} + +/// Assign costs for increasing/decreasing the jump counts. +std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params, + const FlowJump &Jump) { + // Modifying the weight of an unlikely jump is expensive + if (Jump.IsUnlikely) + return std::make_pair(Params.CostUnlikely, Params.CostUnlikely); + + // Assign default values for the costs + int64_t CostInc = Params.CostJumpInc; + int64_t CostDec = Params.CostJumpDec; + // Update the costs depending on the block metadata + if (Jump.Source + 1 == Jump.Target) { + // Adjusting the fall-through branch + CostInc = Params.CostJumpFTInc; + CostDec = Params.CostJumpFTDec; + } + if (Jump.HasUnknownWeight) { + // The cost is different for fall-through and non-fall-through branches + if (Jump.Source + 1 == Jump.Target) + CostInc = Params.CostJumpUnknownFTInc; + else + CostInc = Params.CostJumpUnknownInc; + CostDec = 0; + } else { + assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight"); + } + return std::make_pair(CostInc, CostDec); +} + +/// Extract resulting block and edge counts from the flow network. +void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network, + FlowFunction &Func) { + uint64_t NumBlocks = Func.Blocks.size(); + uint64_t NumJumps = Func.Jumps.size(); // Extract resulting jump counts - for (auto &Jump : Func.Jumps) { - uint64_t Src = Jump.Source; - uint64_t Dst = Jump.Target; + for (uint64_t J = 0; J < NumJumps; J++) { + auto &Jump = Func.Jumps[J]; + uint64_t SrcOut = 2 * Jump.Source + 1; + uint64_t DstIn = 2 * Jump.Target; + int64_t Flow = 0; - if (Src != Dst) { - uint64_t SrcOut = 3 * Src + 1; - uint64_t DstIn = 3 * Dst; - Flow = Network.getFlow(SrcOut, DstIn); - } else { - uint64_t SrcOut = 3 * Src + 1; - uint64_t SrcAux = 3 * Src + 2; - int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); - if (AuxFlow > 0) - Flow = AuxFlow; - } + int64_t AuxFlow = Network.getFlow(SrcOut, DstIn); + if (Jump.Source != Jump.Target) + Flow = int64_t(Jump.Weight) + AuxFlow; + else + Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0); + Jump.Flow = Flow; assert(Flow >= 0 && "negative jump flow"); } + + // Extract resulting block counts + auto InFlow = std::vector<uint64_t>(NumBlocks, 0); + auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); + for (auto &Jump : Func.Jumps) { + InFlow[Jump.Target] += Jump.Flow; + OutFlow[Jump.Source] += Jump.Flow; + } + for (uint64_t B = 0; B < NumBlocks; B++) { + auto &Block = Func.Blocks[B]; + Block.Flow = std::max(OutFlow[B], InFlow[B]); + } } #ifndef NDEBUG -/// Verify that the computed flow values satisfy flow conservation rules -void verifyWeights(const FlowFunction &Func) { +/// Verify that the provided block/jump weights are as expected. +void verifyInput(const FlowFunction &Func) { + // Verify the entry block + assert(Func.Entry == 0 && Func.Blocks[0].isEntry()); + for (size_t I = 1; I < Func.Blocks.size(); I++) { + assert(!Func.Blocks[I].isEntry() && "multiple entry blocks"); + } + // Verify CFG jumps + for (auto &Block : Func.Blocks) { + assert((!Block.isEntry() || !Block.isExit()) && + "a block cannot be an entry and an exit"); + } + // Verify input block weights + for (auto &Block : Func.Blocks) { + assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) && + "non-zero weight of a block w/o weight except for an entry"); + } + // Verify input jump weights + for (auto &Jump : Func.Jumps) { + assert((!Jump.HasUnknownWeight || Jump.Weight == 0) && + "non-zero weight of a jump w/o weight"); + } +} + +/// Verify that the computed flow values satisfy flow conservation rules. +void verifyOutput(const FlowFunction &Func) { const uint64_t NumBlocks = Func.Blocks.size(); auto InFlow = std::vector<uint64_t>(NumBlocks, 0); auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); - for (auto &Jump : Func.Jumps) { + for (const auto &Jump : Func.Jumps) { InFlow[Jump.Target] += Jump.Flow; OutFlow[Jump.Source] += Jump.Flow; } @@ -1202,7 +1271,7 @@ void verifyWeights(const FlowFunction &Func) { // One could modify FlowFunction to hold edges indexed by the sources, which // will avoid a creation of the object auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); - for (auto &Jump : Func.Jumps) { + for (const auto &Jump : Func.Jumps) { if (Jump.Flow > 0) { PositiveFlowEdges[Jump.Source].push_back(Jump.Target); } @@ -1235,22 +1304,44 @@ void verifyWeights(const FlowFunction &Func) { } // end of anonymous namespace -/// Apply the profile inference algorithm for a given flow function -void llvm::applyFlowInference(FlowFunction &Func) { +/// Apply the profile inference algorithm for a given function +void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) { +#ifndef NDEBUG + // Verify the input data + verifyInput(Func); +#endif + // Create and apply an inference network model - auto InferenceNetwork = MinCostMaxFlow(); - initializeNetwork(InferenceNetwork, Func); + auto InferenceNetwork = MinCostMaxFlow(Params); + initializeNetwork(Params, InferenceNetwork, Func); InferenceNetwork.run(); // Extract flow values for every block and every edge - extractWeights(InferenceNetwork, Func); + extractWeights(Params, InferenceNetwork, Func); // Post-processing adjustments to the flow - auto Adjuster = FlowAdjuster(Func); + auto Adjuster = FlowAdjuster(Params, Func); Adjuster.run(); #ifndef NDEBUG // Verify the result - verifyWeights(Func); + verifyOutput(Func); #endif } + +/// Apply the profile inference algorithm for a given flow function +void llvm::applyFlowInference(FlowFunction &Func) { + ProfiParams Params; + // Set the params from the command-line flags. + Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution; + Params.RebalanceUnknown = SampleProfileRebalanceUnknown; + Params.JoinIslands = SampleProfileJoinIslands; + Params.CostBlockInc = SampleProfileProfiCostBlockInc; + Params.CostBlockDec = SampleProfileProfiCostBlockDec; + Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc; + Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec; + Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc; + Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc; + + applyFlowInference(Params, Func); +} |