aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-04-14 21:41:27 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-06-22 18:20:56 +0000
commitbdd1243df58e60e85101c09001d9812a789b6bc4 (patch)
treea1ce621c7301dd47ba2ddc3b8eaa63b441389481 /contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
parent781624ca2d054430052c828ba8d2c2eaf2d733e7 (diff)
parente3b557809604d036af6e00c60f012c2025b59a5e (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp463
1 files changed, 277 insertions, 186 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
index 5e92b9852a9f..691ee00bd831 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -26,34 +26,42 @@ using namespace llvm;
namespace {
-static cl::opt<bool> SampleProfileEvenCountDistribution(
- "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
- cl::desc("Try to evenly distribute counts when there are multiple equally "
+static cl::opt<bool> SampleProfileEvenFlowDistribution(
+ "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden,
+ cl::desc("Try to evenly distribute flow when there are multiple equally "
"likely options."));
-static cl::opt<unsigned> SampleProfileMaxDfsCalls(
- "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
- cl::desc("Maximum number of dfs iterations for even count distribution."));
+static cl::opt<bool> SampleProfileRebalanceUnknown(
+ "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden,
+ cl::desc("Evenly re-distribute flow among unknown subgraphs."));
-static cl::opt<unsigned> SampleProfileProfiCostInc(
- "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
- cl::desc("A cost of increasing a block's count by one."));
+static cl::opt<bool> SampleProfileJoinIslands(
+ "sample-profile-join-islands", cl::init(true), cl::Hidden,
+ cl::desc("Join isolated components having positive flow."));
-static cl::opt<unsigned> SampleProfileProfiCostDec(
- "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
- cl::desc("A cost of decreasing a block's count by one."));
+static cl::opt<unsigned> SampleProfileProfiCostBlockInc(
+ "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden,
+ cl::desc("The cost of increasing a block's count by one."));
-static cl::opt<unsigned> SampleProfileProfiCostIncZero(
- "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
- cl::desc("A cost of increasing a count of zero-weight block by one."));
+static cl::opt<unsigned> SampleProfileProfiCostBlockDec(
+ "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden,
+ cl::desc("The cost of decreasing a block's count by one."));
-static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
- "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
- cl::desc("A cost of increasing the entry block's count by one."));
+static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
+ "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
+ cl::desc("The cost of increasing the entry block's count by one."));
-static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
- "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
- cl::desc("A cost of decreasing the entry block's count by one."));
+static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
+ "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
+ cl::desc("The cost of decreasing the entry block's count by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
+ "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden,
+ cl::desc("The cost of increasing a count of zero-weight block by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
+ "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
+ cl::desc("The cost of increasing an unknown block's count by one."));
/// A value indicating an infinite flow/capacity/weight of a block/edge.
/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
@@ -76,6 +84,8 @@ static constexpr int64_t INF = ((int64_t)1) << 50;
/// minimum total cost respecting the given edge capacities.
class MinCostMaxFlow {
public:
+ MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
+
// Initialize algorithm's data structures for a network of a given size.
void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
Source = SourceNode;
@@ -83,13 +93,15 @@ public:
Nodes = std::vector<Node>(NodeCount);
Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
- if (SampleProfileEvenCountDistribution)
+ if (Params.EvenFlowDistribution)
AugmentingEdges =
std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
}
// Run the algorithm.
int64_t run() {
+ LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");
+
// Iteratively find an augmentation path/dag in the network and send the
// flow along its edges
size_t AugmentationIters = applyFlowAugmentation();
@@ -148,7 +160,7 @@ public:
/// Returns a list of pairs (target node, amount of flow to the target).
const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
std::vector<std::pair<uint64_t, int64_t>> Flow;
- for (auto &Edge : Edges[Src]) {
+ for (const auto &Edge : Edges[Src]) {
if (Edge.Flow > 0)
Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
}
@@ -158,7 +170,7 @@ public:
/// Get the total flow between a pair of nodes.
int64_t getFlow(uint64_t Src, uint64_t Dst) const {
int64_t Flow = 0;
- for (auto &Edge : Edges[Src]) {
+ for (const auto &Edge : Edges[Src]) {
if (Edge.Dst == Dst) {
Flow += Edge.Flow;
}
@@ -166,11 +178,6 @@ public:
return Flow;
}
- /// A cost of taking an unlikely jump.
- static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
- /// Minimum BaseDistance for the jump distance values in island joining.
- static constexpr uint64_t MinBaseDistance = 10000;
-
private:
/// Iteratively find an augmentation path/dag in the network and send the
/// flow along its edges. The method returns the number of applied iterations.
@@ -180,7 +187,7 @@ private:
uint64_t PathCapacity = computeAugmentingPathCapacity();
while (PathCapacity > 0) {
bool Progress = false;
- if (SampleProfileEvenCountDistribution) {
+ if (Params.EvenFlowDistribution) {
// Identify node/edge candidates for augmentation
identifyShortestEdges(PathCapacity);
@@ -253,7 +260,7 @@ private:
// from Source to Target; it follows from inequalities
// Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
// >= Dist[Source, V]
- if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
+ if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)
break;
if (Nodes[Src].Distance > Nodes[Target].Distance)
continue;
@@ -342,7 +349,7 @@ private:
if (Edge.OnShortestPath) {
// If we haven't seen Edge.Dst so far, continue DFS search there
- if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
+ if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
Dst.Discovery = ++Time;
Stack.emplace(Edge.Dst, 0);
Dst.NumCalls++;
@@ -512,6 +519,9 @@ private:
}
}
+ /// Maximum number of DFS iterations for DAG finding.
+ static constexpr uint64_t MaxDfsCalls = 10;
+
/// A node in a flow network.
struct Node {
/// The cost of the cheapest path from the source to the current node.
@@ -566,12 +576,11 @@ private:
uint64_t Target;
/// Augmenting edges.
std::vector<std::vector<Edge *>> AugmentingEdges;
+ /// Params for flow computation.
+ const ProfiParams &Params;
};
-constexpr int64_t MinCostMaxFlow::AuxCostUnlikely;
-constexpr uint64_t MinCostMaxFlow::MinBaseDistance;
-
-/// A post-processing adjustment of control flow. It applies two steps by
+/// A post-processing adjustment of the control flow. It applies two steps by
/// rerouting some flow and making it more realistic:
///
/// - First, it removes all isolated components ("islands") with a positive flow
@@ -589,18 +598,20 @@ constexpr uint64_t MinCostMaxFlow::MinBaseDistance;
///
class FlowAdjuster {
public:
- FlowAdjuster(FlowFunction &Func) : Func(Func) {
- assert(Func.Blocks[Func.Entry].isEntry() &&
- "incorrect index of the entry block");
- }
+ FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
+ : Params(Params), Func(Func) {}
- // Run the post-processing
+ /// Apply the post-processing.
void run() {
- /// Adjust the flow to get rid of isolated components.
- joinIsolatedComponents();
+ if (Params.JoinIslands) {
+ // Adjust the flow to get rid of isolated components
+ joinIsolatedComponents();
+ }
- /// Rebalance the flow inside unknown subgraphs.
- rebalanceUnknownSubgraphs();
+ if (Params.RebalanceUnknown) {
+ // Rebalance the flow inside unknown subgraphs
+ rebalanceUnknownSubgraphs();
+ }
}
private:
@@ -640,7 +651,7 @@ private:
while (!Queue.empty()) {
Src = Queue.front();
Queue.pop();
- for (auto Jump : Func.Blocks[Src].SuccJumps) {
+ for (auto *Jump : Func.Blocks[Src].SuccJumps) {
uint64_t Dst = Jump->Target;
if (Jump->Flow > 0 && !Visited[Dst]) {
Queue.push(Dst);
@@ -691,7 +702,7 @@ private:
(Func.Blocks[Src].isExit() && Target == AnyExitBlock))
break;
- for (auto Jump : Func.Blocks[Src].SuccJumps) {
+ for (auto *Jump : Func.Blocks[Src].SuccJumps) {
uint64_t Dst = Jump->Target;
int64_t JumpDist = jumpDistance(Jump);
if (Distance[Dst] > Distance[Src] + JumpDist) {
@@ -739,15 +750,15 @@ private:
/// To capture this objective with integer distances, we round off fractional
/// parts to a multiple of 1 / BaseDistance.
int64_t jumpDistance(FlowJump *Jump) const {
+ if (Jump->IsUnlikely)
+ return Params.CostUnlikely;
uint64_t BaseDistance =
- std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance),
+ std::max(FlowAdjuster::MinBaseDistance,
std::min(Func.Blocks[Func.Entry].Flow,
- MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
- if (Jump->IsUnlikely)
- return MinCostMaxFlow::AuxCostUnlikely;
+ Params.CostUnlikely / (2 * (NumBlocks() + 1))));
if (Jump->Flow > 0)
return BaseDistance + BaseDistance / Jump->Flow;
- return BaseDistance * NumBlocks();
+ return 2 * BaseDistance * (NumBlocks() + 1);
};
uint64_t NumBlocks() const { return Func.Blocks.size(); }
@@ -758,31 +769,30 @@ private:
/// blocks. Then it verifies if flow rebalancing is feasible and applies it.
void rebalanceUnknownSubgraphs() {
// Try to find unknown subgraphs from each block
- for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
- auto SrcBlock = &Func.Blocks[I];
+ for (const FlowBlock &SrcBlock : Func.Blocks) {
// Verify if rebalancing rooted at SrcBlock is feasible
- if (!canRebalanceAtRoot(SrcBlock))
+ if (!canRebalanceAtRoot(&SrcBlock))
continue;
// Find an unknown subgraphs starting at SrcBlock. Along the way,
// fill in known destinations and intermediate unknown blocks.
std::vector<FlowBlock *> UnknownBlocks;
std::vector<FlowBlock *> KnownDstBlocks;
- findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
+ findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
// Verify if rebalancing of the subgraph is feasible. If the search is
// successful, find the unique destination block (which can be null)
FlowBlock *DstBlock = nullptr;
- if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
+ if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
DstBlock))
continue;
// We cannot rebalance subgraphs containing cycles among unknown blocks
- if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
+ if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
continue;
// Rebalance the flow
- rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
+ rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
}
}
@@ -790,13 +800,13 @@ private:
bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
// Do not attempt to find unknown subgraphs from an unknown or a
// zero-flow block
- if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
+ if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
return false;
// Do not attempt to process subgraphs from a block w/o unknown sucessors
bool HasUnknownSuccs = false;
- for (auto Jump : SrcBlock->SuccJumps) {
- if (Func.Blocks[Jump->Target].UnknownWeight) {
+ for (auto *Jump : SrcBlock->SuccJumps) {
+ if (Func.Blocks[Jump->Target].HasUnknownWeight) {
HasUnknownSuccs = true;
break;
}
@@ -823,7 +833,7 @@ private:
auto &Block = Func.Blocks[Queue.front()];
Queue.pop();
// Process blocks reachable from Block
- for (auto Jump : Block.SuccJumps) {
+ for (auto *Jump : Block.SuccJumps) {
// If Jump can be ignored, skip it
if (ignoreJump(SrcBlock, nullptr, Jump))
continue;
@@ -834,7 +844,7 @@ private:
continue;
// Process block Dst
Visited[Dst] = true;
- if (!Func.Blocks[Dst].UnknownWeight) {
+ if (!Func.Blocks[Dst].HasUnknownWeight) {
KnownDstBlocks.push_back(&Func.Blocks[Dst]);
} else {
Queue.push(Dst);
@@ -860,7 +870,7 @@ private:
DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
// Verify sinks of the subgraph
- for (auto Block : UnknownBlocks) {
+ for (auto *Block : UnknownBlocks) {
if (Block->SuccJumps.empty()) {
// If there are multiple (known and unknown) sinks, we can't rebalance
if (DstBlock != nullptr)
@@ -868,7 +878,7 @@ private:
continue;
}
size_t NumIgnoredJumps = 0;
- for (auto Jump : Block->SuccJumps) {
+ for (auto *Jump : Block->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
NumIgnoredJumps++;
}
@@ -897,11 +907,11 @@ private:
return false;
// Ignore jumps out of SrcBlock to known blocks
- if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
+ if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
return true;
// Ignore jumps to known blocks with zero flow
- if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
+ if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
return true;
return false;
@@ -914,14 +924,14 @@ private:
// Extract local in-degrees in the considered subgraph
auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
auto fillInDegree = [&](const FlowBlock *Block) {
- for (auto Jump : Block->SuccJumps) {
+ for (auto *Jump : Block->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
continue;
LocalInDegree[Jump->Target]++;
}
};
fillInDegree(SrcBlock);
- for (auto Block : UnknownBlocks) {
+ for (auto *Block : UnknownBlocks) {
fillInDegree(Block);
}
// A loop containing SrcBlock
@@ -939,11 +949,11 @@ private:
break;
// Keep an acyclic order of unknown blocks
- if (Block->UnknownWeight && Block != SrcBlock)
+ if (Block->HasUnknownWeight && Block != SrcBlock)
AcyclicOrder.push_back(Block);
// Add to the queue all successors with zero local in-degree
- for (auto Jump : Block->SuccJumps) {
+ for (auto *Jump : Block->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
continue;
uint64_t Dst = Jump->Target;
@@ -972,7 +982,7 @@ private:
// Ditribute flow from the source block
uint64_t BlockFlow = 0;
// SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
- for (auto Jump : SrcBlock->SuccJumps) {
+ for (auto *Jump : SrcBlock->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
continue;
BlockFlow += Jump->Flow;
@@ -980,11 +990,11 @@ private:
rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
// Ditribute flow from the remaining blocks
- for (auto Block : UnknownBlocks) {
- assert(Block->UnknownWeight && "incorrect unknown subgraph");
+ for (auto *Block : UnknownBlocks) {
+ assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
uint64_t BlockFlow = 0;
// Block's flow is the sum of incoming flows
- for (auto Jump : Block->PredJumps) {
+ for (auto *Jump : Block->PredJumps) {
BlockFlow += Jump->Flow;
}
Block->Flow = BlockFlow;
@@ -998,7 +1008,7 @@ private:
const FlowBlock *Block, uint64_t BlockFlow) {
// Process all successor jumps and update corresponding flow values
size_t BlockDegree = 0;
- for (auto Jump : Block->SuccJumps) {
+ for (auto *Jump : Block->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
continue;
BlockDegree++;
@@ -1011,7 +1021,7 @@ private:
// Each of the Block's successors gets the following amount of flow.
// Rounding the value up so that all flow is propagated
uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
- for (auto Jump : Block->SuccJumps) {
+ for (auto *Jump : Block->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
continue;
uint64_t Flow = std::min(SuccFlow, BlockFlow);
@@ -1023,104 +1033,88 @@ private:
/// A constant indicating an arbitrary exit block of a function.
static constexpr uint64_t AnyExitBlock = uint64_t(-1);
+ /// Minimum BaseDistance for the jump distance values in island joining.
+ static constexpr uint64_t MinBaseDistance = 10000;
+ /// Params for flow computation.
+ const ProfiParams &Params;
/// The function.
FlowFunction &Func;
};
+std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
+ const FlowBlock &Block);
+std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
+ const FlowJump &Jump);
+
/// Initializing flow network for a given function.
///
-/// Every block is split into three nodes that are responsible for (i) an
-/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
+/// Every block is split into two nodes that are responsible for (i) an
+/// incoming flow, (ii) an outgoing flow; they penalize an increase or a
/// reduction of the block weight.
-void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
+void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
+ FlowFunction &Func) {
uint64_t NumBlocks = Func.Blocks.size();
assert(NumBlocks > 1 && "Too few blocks in a function");
- LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
+ uint64_t NumJumps = Func.Jumps.size();
+ assert(NumJumps > 0 && "Too few jumps in a function");
- // Pre-process data: make sure the entry weight is at least 1
- if (Func.Blocks[Func.Entry].Weight == 0) {
- Func.Blocks[Func.Entry].Weight = 1;
- }
// Introducing dummy source/sink pairs to allow flow circulation.
- // The nodes corresponding to blocks of Func have indicies in the range
- // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
- uint64_t S = 3 * NumBlocks;
+ // The nodes corresponding to blocks of the function have indicies in
+ // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
+ // next four values.
+ uint64_t S = 2 * NumBlocks;
uint64_t T = S + 1;
uint64_t S1 = S + 2;
uint64_t T1 = S + 3;
- Network.initialize(3 * NumBlocks + 4, S1, T1);
+ Network.initialize(2 * NumBlocks + 4, S1, T1);
- // Create three nodes for every block of the function
+ // Initialize nodes of the flow network
for (uint64_t B = 0; B < NumBlocks; B++) {
auto &Block = Func.Blocks[B];
- assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
- "non-zero weight of a block w/o weight except for an entry");
- // Split every block into two nodes
- uint64_t Bin = 3 * B;
- uint64_t Bout = 3 * B + 1;
- uint64_t Baux = 3 * B + 2;
- if (Block.Weight > 0) {
- Network.addEdge(S1, Bout, Block.Weight, 0);
- Network.addEdge(Bin, T1, Block.Weight, 0);
- }
+ // Split every block into two auxiliary nodes to allow
+ // increase/reduction of the block count.
+ uint64_t Bin = 2 * B;
+ uint64_t Bout = 2 * B + 1;
// Edges from S and to T
- assert((!Block.isEntry() || !Block.isExit()) &&
- "a block cannot be an entry and an exit");
if (Block.isEntry()) {
Network.addEdge(S, Bin, 0);
} else if (Block.isExit()) {
Network.addEdge(Bout, T, 0);
}
- // An auxiliary node to allow increase/reduction of block counts:
- // We assume that decreasing block counts is more expensive than increasing,
- // and thus, setting separate costs here. In the future we may want to tune
- // the relative costs so as to maximize the quality of generated profiles.
- int64_t AuxCostInc = SampleProfileProfiCostInc;
- int64_t AuxCostDec = SampleProfileProfiCostDec;
- if (Block.UnknownWeight) {
- // Do not penalize changing weights of blocks w/o known profile count
- AuxCostInc = 0;
- AuxCostDec = 0;
- } else {
- // Increasing the count for "cold" blocks with zero initial count is more
- // expensive than for "hot" ones
- if (Block.Weight == 0) {
- AuxCostInc = SampleProfileProfiCostIncZero;
- }
- // Modifying the count of the entry block is expensive
- if (Block.isEntry()) {
- AuxCostInc = SampleProfileProfiCostIncEntry;
- AuxCostDec = SampleProfileProfiCostDecEntry;
- }
- }
- // For blocks with self-edges, do not penalize a reduction of the count,
- // as all of the increase can be attributed to the self-edge
- if (Block.HasSelfEdge) {
- AuxCostDec = 0;
- }
+ // Assign costs for increasing/decreasing the block counts
+ auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
- Network.addEdge(Bin, Baux, AuxCostInc);
- Network.addEdge(Baux, Bout, AuxCostInc);
+ // Add the corresponding edges to the network
+ Network.addEdge(Bin, Bout, AuxCostInc);
if (Block.Weight > 0) {
- Network.addEdge(Bout, Baux, AuxCostDec);
- Network.addEdge(Baux, Bin, AuxCostDec);
+ Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);
+ Network.addEdge(S1, Bout, Block.Weight, 0);
+ Network.addEdge(Bin, T1, Block.Weight, 0);
}
}
- // Creating edges for every jump
- for (auto &Jump : Func.Jumps) {
- uint64_t Src = Jump.Source;
- uint64_t Dst = Jump.Target;
- if (Src != Dst) {
- uint64_t SrcOut = 3 * Src + 1;
- uint64_t DstIn = 3 * Dst;
- uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
- Network.addEdge(SrcOut, DstIn, Cost);
+ // Initialize edges of the flow network
+ for (uint64_t J = 0; J < NumJumps; J++) {
+ auto &Jump = Func.Jumps[J];
+
+ // Get the endpoints corresponding to the jump
+ uint64_t Jin = 2 * Jump.Source + 1;
+ uint64_t Jout = 2 * Jump.Target;
+
+ // Assign costs for increasing/decreasing the jump counts
+ auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
+
+ // Add the corresponding edges to the network
+ Network.addEdge(Jin, Jout, AuxCostInc);
+ if (Jump.Weight > 0) {
+ Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
+ Network.addEdge(S1, Jout, Jump.Weight, 0);
+ Network.addEdge(Jin, T1, Jump.Weight, 0);
}
}
@@ -1128,55 +1122,130 @@ void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
Network.addEdge(T, S, 0);
}
-/// Extract resulting block and edge counts from the flow network.
-void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
- uint64_t NumBlocks = Func.Blocks.size();
-
- // Extract resulting block counts
- for (uint64_t Src = 0; Src < NumBlocks; Src++) {
- auto &Block = Func.Blocks[Src];
- uint64_t SrcOut = 3 * Src + 1;
- int64_t Flow = 0;
- for (auto &Adj : Network.getFlow(SrcOut)) {
- uint64_t DstIn = Adj.first;
- int64_t DstFlow = Adj.second;
- bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
- if (!IsAuxNode || Block.HasSelfEdge) {
- Flow += DstFlow;
- }
+/// Assign costs for increasing/decreasing the block counts.
+std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
+ const FlowBlock &Block) {
+ // Modifying the weight of an unlikely block is expensive
+ if (Block.IsUnlikely)
+ return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
+
+ // Assign default values for the costs
+ int64_t CostInc = Params.CostBlockInc;
+ int64_t CostDec = Params.CostBlockDec;
+ // Update the costs depending on the block metadata
+ if (Block.HasUnknownWeight) {
+ CostInc = Params.CostBlockUnknownInc;
+ CostDec = 0;
+ } else {
+ // Increasing the count for "cold" blocks with zero initial count is more
+ // expensive than for "hot" ones
+ if (Block.Weight == 0)
+ CostInc = Params.CostBlockZeroInc;
+ // Modifying the count of the entry block is expensive
+ if (Block.isEntry()) {
+ CostInc = Params.CostBlockEntryInc;
+ CostDec = Params.CostBlockEntryDec;
}
- Block.Flow = Flow;
- assert(Flow >= 0 && "negative block flow");
}
+ return std::make_pair(CostInc, CostDec);
+}
+
+/// Assign costs for increasing/decreasing the jump counts.
+std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
+ const FlowJump &Jump) {
+ // Modifying the weight of an unlikely jump is expensive
+ if (Jump.IsUnlikely)
+ return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
+
+ // Assign default values for the costs
+ int64_t CostInc = Params.CostJumpInc;
+ int64_t CostDec = Params.CostJumpDec;
+ // Update the costs depending on the block metadata
+ if (Jump.Source + 1 == Jump.Target) {
+ // Adjusting the fall-through branch
+ CostInc = Params.CostJumpFTInc;
+ CostDec = Params.CostJumpFTDec;
+ }
+ if (Jump.HasUnknownWeight) {
+ // The cost is different for fall-through and non-fall-through branches
+ if (Jump.Source + 1 == Jump.Target)
+ CostInc = Params.CostJumpUnknownFTInc;
+ else
+ CostInc = Params.CostJumpUnknownInc;
+ CostDec = 0;
+ } else {
+ assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
+ }
+ return std::make_pair(CostInc, CostDec);
+}
+
+/// Extract resulting block and edge counts from the flow network.
+void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
+ FlowFunction &Func) {
+ uint64_t NumBlocks = Func.Blocks.size();
+ uint64_t NumJumps = Func.Jumps.size();
// Extract resulting jump counts
- for (auto &Jump : Func.Jumps) {
- uint64_t Src = Jump.Source;
- uint64_t Dst = Jump.Target;
+ for (uint64_t J = 0; J < NumJumps; J++) {
+ auto &Jump = Func.Jumps[J];
+ uint64_t SrcOut = 2 * Jump.Source + 1;
+ uint64_t DstIn = 2 * Jump.Target;
+
int64_t Flow = 0;
- if (Src != Dst) {
- uint64_t SrcOut = 3 * Src + 1;
- uint64_t DstIn = 3 * Dst;
- Flow = Network.getFlow(SrcOut, DstIn);
- } else {
- uint64_t SrcOut = 3 * Src + 1;
- uint64_t SrcAux = 3 * Src + 2;
- int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
- if (AuxFlow > 0)
- Flow = AuxFlow;
- }
+ int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
+ if (Jump.Source != Jump.Target)
+ Flow = int64_t(Jump.Weight) + AuxFlow;
+ else
+ Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
+
Jump.Flow = Flow;
assert(Flow >= 0 && "negative jump flow");
}
+
+ // Extract resulting block counts
+ auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
+ auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
+ for (auto &Jump : Func.Jumps) {
+ InFlow[Jump.Target] += Jump.Flow;
+ OutFlow[Jump.Source] += Jump.Flow;
+ }
+ for (uint64_t B = 0; B < NumBlocks; B++) {
+ auto &Block = Func.Blocks[B];
+ Block.Flow = std::max(OutFlow[B], InFlow[B]);
+ }
}
#ifndef NDEBUG
-/// Verify that the computed flow values satisfy flow conservation rules
-void verifyWeights(const FlowFunction &Func) {
+/// Verify that the provided block/jump weights are as expected.
+void verifyInput(const FlowFunction &Func) {
+ // Verify the entry block
+ assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
+ for (size_t I = 1; I < Func.Blocks.size(); I++) {
+ assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
+ }
+ // Verify CFG jumps
+ for (auto &Block : Func.Blocks) {
+ assert((!Block.isEntry() || !Block.isExit()) &&
+ "a block cannot be an entry and an exit");
+ }
+ // Verify input block weights
+ for (auto &Block : Func.Blocks) {
+ assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
+ "non-zero weight of a block w/o weight except for an entry");
+ }
+ // Verify input jump weights
+ for (auto &Jump : Func.Jumps) {
+ assert((!Jump.HasUnknownWeight || Jump.Weight == 0) &&
+ "non-zero weight of a jump w/o weight");
+ }
+}
+
+/// Verify that the computed flow values satisfy flow conservation rules.
+void verifyOutput(const FlowFunction &Func) {
const uint64_t NumBlocks = Func.Blocks.size();
auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
- for (auto &Jump : Func.Jumps) {
+ for (const auto &Jump : Func.Jumps) {
InFlow[Jump.Target] += Jump.Flow;
OutFlow[Jump.Source] += Jump.Flow;
}
@@ -1202,7 +1271,7 @@ void verifyWeights(const FlowFunction &Func) {
// One could modify FlowFunction to hold edges indexed by the sources, which
// will avoid a creation of the object
auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
- for (auto &Jump : Func.Jumps) {
+ for (const auto &Jump : Func.Jumps) {
if (Jump.Flow > 0) {
PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
}
@@ -1235,22 +1304,44 @@ void verifyWeights(const FlowFunction &Func) {
} // end of anonymous namespace
-/// Apply the profile inference algorithm for a given flow function
-void llvm::applyFlowInference(FlowFunction &Func) {
+/// Apply the profile inference algorithm for a given function
+void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {
+#ifndef NDEBUG
+ // Verify the input data
+ verifyInput(Func);
+#endif
+
// Create and apply an inference network model
- auto InferenceNetwork = MinCostMaxFlow();
- initializeNetwork(InferenceNetwork, Func);
+ auto InferenceNetwork = MinCostMaxFlow(Params);
+ initializeNetwork(Params, InferenceNetwork, Func);
InferenceNetwork.run();
// Extract flow values for every block and every edge
- extractWeights(InferenceNetwork, Func);
+ extractWeights(Params, InferenceNetwork, Func);
// Post-processing adjustments to the flow
- auto Adjuster = FlowAdjuster(Func);
+ auto Adjuster = FlowAdjuster(Params, Func);
Adjuster.run();
#ifndef NDEBUG
// Verify the result
- verifyWeights(Func);
+ verifyOutput(Func);
#endif
}
+
+/// Apply the profile inference algorithm for a given flow function
+void llvm::applyFlowInference(FlowFunction &Func) {
+ ProfiParams Params;
+ // Set the params from the command-line flags.
+ Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
+ Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
+ Params.JoinIslands = SampleProfileJoinIslands;
+ Params.CostBlockInc = SampleProfileProfiCostBlockInc;
+ Params.CostBlockDec = SampleProfileProfiCostBlockDec;
+ Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
+ Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
+ Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
+ Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
+
+ applyFlowInference(Params, Func);
+}