aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Support/BalancedPartitioning.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Support/BalancedPartitioning.cpp')
-rw-r--r--llvm/lib/Support/BalancedPartitioning.cpp337
1 files changed, 337 insertions, 0 deletions
diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp
new file mode 100644
index 000000000000..113e9484f528
--- /dev/null
+++ b/llvm/lib/Support/BalancedPartitioning.cpp
@@ -0,0 +1,337 @@
+//===- BalancedPartitioning.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements BalancedPartitioning, a recursive balanced graph
+// partitioning algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BalancedPartitioning.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/ThreadPool.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "balanced-partitioning"
+
+void BPFunctionNode::dump(raw_ostream &OS) const {
+ OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id,
+ make_range(UtilityNodes.begin(), UtilityNodes.end()), Bucket);
+}
+
+template <typename Func>
+void BalancedPartitioning::BPThreadPool::async(Func &&F) {
+#if LLVM_ENABLE_THREADS
+ // This new thread could spawn more threads, so mark it as active
+ ++NumActiveThreads;
+ TheThreadPool.async([=]() {
+ // Run the task
+ F();
+
+ // This thread will no longer spawn new threads, so mark it as inactive
+ if (--NumActiveThreads == 0) {
+ // There are no more active threads, so mark as finished and notify
+ {
+ std::unique_lock<std::mutex> lock(mtx);
+ assert(!IsFinishedSpawning);
+ IsFinishedSpawning = true;
+ }
+ cv.notify_one();
+ }
+ });
+#else
+ llvm_unreachable("threads are disabled");
+#endif
+}
+
+void BalancedPartitioning::BPThreadPool::wait() {
+#if LLVM_ENABLE_THREADS
+ // TODO: We could remove the mutex and condition variable and use
+ // std::atomic::wait() instead, but that isn't available until C++20
+ {
+ std::unique_lock<std::mutex> lock(mtx);
+ cv.wait(lock, [&]() { return IsFinishedSpawning; });
+ assert(IsFinishedSpawning && NumActiveThreads == 0);
+ }
+ // Now we can call ThreadPool::wait() since all tasks have been submitted
+ TheThreadPool.wait();
+#else
+ llvm_unreachable("threads are disabled");
+#endif
+}
+
+BalancedPartitioning::BalancedPartitioning(
+ const BalancedPartitioningConfig &Config)
+ : Config(Config) {
+ // Pre-computing log2 values
+ Log2Cache[0] = 0.0;
+ for (unsigned I = 1; I < LOG_CACHE_SIZE; I++)
+ Log2Cache[I] = std::log2(I);
+}
+
+void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const {
+ LLVM_DEBUG(
+ dbgs() << format(
+ "Partitioning %d nodes using depth %d and %d iterations per split\n",
+ Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit));
+ std::optional<BPThreadPool> TP;
+#if LLVM_ENABLE_THREADS
+ ThreadPool TheThreadPool;
+ if (Config.TaskSplitDepth > 1)
+ TP.emplace(TheThreadPool);
+#endif
+
+ // Record the input order
+ for (unsigned I = 0; I < Nodes.size(); I++)
+ Nodes[I].InputOrderIndex = I;
+
+ auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end());
+ auto BisectTask = [=, &TP]() {
+ bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP);
+ };
+ if (TP) {
+ TP->async(std::move(BisectTask));
+ TP->wait();
+ } else {
+ BisectTask();
+ }
+
+ llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) {
+ return L.Bucket < R.Bucket;
+ });
+
+ LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n");
+}
+
+void BalancedPartitioning::bisect(const FunctionNodeRange Nodes,
+ unsigned RecDepth, unsigned RootBucket,
+ unsigned Offset,
+ std::optional<BPThreadPool> &TP) const {
+ unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
+ if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) {
+ // We've reach the lowest level of the recursion tree. Fall back to the
+ // original order and assign to buckets.
+ llvm::stable_sort(Nodes, [](const auto &L, const auto &R) {
+ return L.InputOrderIndex < R.InputOrderIndex;
+ });
+ for (auto &N : Nodes)
+ N.Bucket = Offset++;
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n",
+ NumNodes, RootBucket));
+
+ std::mt19937 RNG(RootBucket);
+
+ unsigned LeftBucket = 2 * RootBucket;
+ unsigned RightBucket = 2 * RootBucket + 1;
+
+ // Split into two and assign to the left and right buckets
+ split(Nodes, LeftBucket);
+
+ runIterations(Nodes, RecDepth, LeftBucket, RightBucket, RNG);
+
+ // Split nodes wrt the resulting buckets
+ auto NodesMid =
+ llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; });
+ unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid);
+
+ auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid);
+ auto RightNodes = llvm::make_range(NodesMid, Nodes.end());
+
+ auto LeftRecTask = [=, &TP]() {
+ bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP);
+ };
+ auto RightRecTask = [=, &TP]() {
+ bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP);
+ };
+
+ if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) {
+ TP->async(std::move(LeftRecTask));
+ TP->async(std::move(RightRecTask));
+ } else {
+ LeftRecTask();
+ RightRecTask();
+ }
+}
+
+void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes,
+ unsigned RecDepth, unsigned LeftBucket,
+ unsigned RightBucket,
+ std::mt19937 &RNG) const {
+ unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
+ DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeDegree;
+ for (auto &N : Nodes)
+ for (auto &UN : N.UtilityNodes)
+ ++UtilityNodeDegree[UN];
+ // Remove utility nodes if they have just one edge or are connected to all
+ // functions
+ for (auto &N : Nodes)
+ llvm::erase_if(N.UtilityNodes, [&](auto &UN) {
+ return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes;
+ });
+
+ // Renumber utility nodes so they can be used to index into Signatures
+ DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
+ for (auto &N : Nodes)
+ for (auto &UN : N.UtilityNodes)
+ if (!UtilityNodeIndex.count(UN))
+ UtilityNodeIndex[UN] = UtilityNodeIndex.size();
+ for (auto &N : Nodes)
+ for (auto &UN : N.UtilityNodes)
+ UN = UtilityNodeIndex[UN];
+
+ // Initialize signatures
+ SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size());
+ for (auto &N : Nodes) {
+ for (auto &UN : N.UtilityNodes) {
+ assert(UN < Signatures.size());
+ if (N.Bucket == LeftBucket) {
+ Signatures[UN].LeftCount++;
+ } else {
+ Signatures[UN].RightCount++;
+ }
+ }
+ }
+
+ for (unsigned I = 0; I < Config.IterationsPerSplit; I++) {
+ unsigned NumMovedNodes =
+ runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG);
+ if (NumMovedNodes == 0)
+ break;
+ }
+}
+
+unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,
+ unsigned LeftBucket,
+ unsigned RightBucket,
+ SignaturesT &Signatures,
+ std::mt19937 &RNG) const {
+ // Init signature cost caches
+ for (auto &Signature : Signatures) {
+ if (Signature.CachedGainIsValid)
+ continue;
+ unsigned L = Signature.LeftCount;
+ unsigned R = Signature.RightCount;
+ assert((L > 0 || R > 0) && "incorrect signature");
+ float Cost = logCost(L, R);
+ Signature.CachedGainLR = 0.f;
+ Signature.CachedGainRL = 0.f;
+ if (L > 0)
+ Signature.CachedGainLR = Cost - logCost(L - 1, R + 1);
+ if (R > 0)
+ Signature.CachedGainRL = Cost - logCost(L + 1, R - 1);
+ Signature.CachedGainIsValid = true;
+ }
+
+ // Compute move gains
+ typedef std::pair<float, BPFunctionNode *> GainPair;
+ std::vector<GainPair> Gains;
+ for (auto &N : Nodes) {
+ bool FromLeftToRight = (N.Bucket == LeftBucket);
+ float Gain = moveGain(N, FromLeftToRight, Signatures);
+ Gains.push_back(std::make_pair(Gain, &N));
+ }
+
+ // Collect left and right gains
+ auto LeftEnd = llvm::partition(
+ Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; });
+ auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd);
+ auto RightRange = llvm::make_range(LeftEnd, Gains.end());
+
+ // Sort gains in descending order
+ auto LargerGain = [](const auto &L, const auto &R) {
+ return L.first > R.first;
+ };
+ llvm::stable_sort(LeftRange, LargerGain);
+ llvm::stable_sort(RightRange, LargerGain);
+
+ unsigned NumMovedDataVertices = 0;
+ for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) {
+ auto &[LeftGain, LeftNode] = LeftPair;
+ auto &[RightGain, RightNode] = RightPair;
+ // Stop when the gain is no longer beneficial
+ if (LeftGain + RightGain <= 0.f)
+ break;
+ // Try to exchange the nodes between buckets
+ if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG))
+ ++NumMovedDataVertices;
+ if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG))
+ ++NumMovedDataVertices;
+ }
+ return NumMovedDataVertices;
+}
+
+bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N,
+ unsigned LeftBucket,
+ unsigned RightBucket,
+ SignaturesT &Signatures,
+ std::mt19937 &RNG) const {
+ // Sometimes we skip the move. This helps to escape local optima
+ if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <=
+ Config.SkipProbability)
+ return false;
+
+ bool FromLeftToRight = (N.Bucket == LeftBucket);
+ // Update the current bucket
+ N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket);
+
+ // Update signatures and invalidate gain cache
+ if (FromLeftToRight) {
+ for (auto &UN : N.UtilityNodes) {
+ auto &Signature = Signatures[UN];
+ Signature.LeftCount--;
+ Signature.RightCount++;
+ Signature.CachedGainIsValid = false;
+ }
+ } else {
+ for (auto &UN : N.UtilityNodes) {
+ auto &Signature = Signatures[UN];
+ Signature.LeftCount++;
+ Signature.RightCount--;
+ Signature.CachedGainIsValid = false;
+ }
+ }
+ return true;
+}
+
+void BalancedPartitioning::split(const FunctionNodeRange Nodes,
+ unsigned StartBucket) const {
+ unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
+ auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2;
+
+ std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) {
+ return L.InputOrderIndex < R.InputOrderIndex;
+ });
+
+ for (auto &N : llvm::make_range(Nodes.begin(), NodesMid))
+ N.Bucket = StartBucket;
+ for (auto &N : llvm::make_range(NodesMid, Nodes.end()))
+ N.Bucket = StartBucket + 1;
+}
+
+float BalancedPartitioning::moveGain(const BPFunctionNode &N,
+ bool FromLeftToRight,
+ const SignaturesT &Signatures) {
+ float Gain = 0.f;
+ for (auto &UN : N.UtilityNodes)
+ Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR
+ : Signatures[UN].CachedGainRL);
+ return Gain;
+}
+
+float BalancedPartitioning::logCost(unsigned X, unsigned Y) const {
+ return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1));
+}
+
+float BalancedPartitioning::log2Cached(unsigned i) const {
+ return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i);
+}