diff options
Diffstat (limited to 'llvm/lib/Support/BalancedPartitioning.cpp')
-rw-r--r-- | llvm/lib/Support/BalancedPartitioning.cpp | 337 |
1 files changed, 337 insertions, 0 deletions
diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp new file mode 100644 index 000000000000..113e9484f528 --- /dev/null +++ b/llvm/lib/Support/BalancedPartitioning.cpp @@ -0,0 +1,337 @@ +//===- BalancedPartitioning.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements BalancedPartitioning, a recursive balanced graph +// partitioning algorithm. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BalancedPartitioning.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/ThreadPool.h" + +using namespace llvm; +#define DEBUG_TYPE "balanced-partitioning" + +void BPFunctionNode::dump(raw_ostream &OS) const { + OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id, + make_range(UtilityNodes.begin(), UtilityNodes.end()), Bucket); +} + +template <typename Func> +void BalancedPartitioning::BPThreadPool::async(Func &&F) { +#if LLVM_ENABLE_THREADS + // This new thread could spawn more threads, so mark it as active + ++NumActiveThreads; + TheThreadPool.async([=]() { + // Run the task + F(); + + // This thread will no longer spawn new threads, so mark it as inactive + if (--NumActiveThreads == 0) { + // There are no more active threads, so mark as finished and notify + { + std::unique_lock<std::mutex> lock(mtx); + assert(!IsFinishedSpawning); + IsFinishedSpawning = true; + } + cv.notify_one(); + } + }); +#else + llvm_unreachable("threads are disabled"); +#endif +} + +void BalancedPartitioning::BPThreadPool::wait() { +#if LLVM_ENABLE_THREADS + // TODO: We could remove the mutex and condition variable and use + // std::atomic::wait() instead, but that isn't available until C++20 + { + std::unique_lock<std::mutex> lock(mtx); + cv.wait(lock, [&]() { return IsFinishedSpawning; }); + assert(IsFinishedSpawning && NumActiveThreads == 0); + } + // Now we can call ThreadPool::wait() since all tasks have been submitted + TheThreadPool.wait(); +#else + llvm_unreachable("threads are disabled"); +#endif +} + +BalancedPartitioning::BalancedPartitioning( + const BalancedPartitioningConfig &Config) + : Config(Config) { + // Pre-computing log2 values + Log2Cache[0] = 0.0; + for (unsigned I = 1; I < LOG_CACHE_SIZE; I++) + Log2Cache[I] = std::log2(I); +} + +void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const { + LLVM_DEBUG( + dbgs() << format( + "Partitioning %d nodes using depth %d and %d iterations per split\n", + Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit)); + std::optional<BPThreadPool> TP; +#if LLVM_ENABLE_THREADS + ThreadPool TheThreadPool; + if (Config.TaskSplitDepth > 1) + TP.emplace(TheThreadPool); +#endif + + // Record the input order + for (unsigned I = 0; I < Nodes.size(); I++) + Nodes[I].InputOrderIndex = I; + + auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end()); + auto BisectTask = [=, &TP]() { + bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP); + }; + if (TP) { + TP->async(std::move(BisectTask)); + TP->wait(); + } else { + BisectTask(); + } + + llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) { + return L.Bucket < R.Bucket; + }); + + LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n"); +} + +void BalancedPartitioning::bisect(const FunctionNodeRange Nodes, + unsigned RecDepth, unsigned RootBucket, + unsigned Offset, + std::optional<BPThreadPool> &TP) const { + unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); + if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) { + // We've reach the lowest level of the recursion tree. Fall back to the + // original order and assign to buckets. + llvm::stable_sort(Nodes, [](const auto &L, const auto &R) { + return L.InputOrderIndex < R.InputOrderIndex; + }); + for (auto &N : Nodes) + N.Bucket = Offset++; + return; + } + + LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n", + NumNodes, RootBucket)); + + std::mt19937 RNG(RootBucket); + + unsigned LeftBucket = 2 * RootBucket; + unsigned RightBucket = 2 * RootBucket + 1; + + // Split into two and assign to the left and right buckets + split(Nodes, LeftBucket); + + runIterations(Nodes, RecDepth, LeftBucket, RightBucket, RNG); + + // Split nodes wrt the resulting buckets + auto NodesMid = + llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; }); + unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid); + + auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid); + auto RightNodes = llvm::make_range(NodesMid, Nodes.end()); + + auto LeftRecTask = [=, &TP]() { + bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP); + }; + auto RightRecTask = [=, &TP]() { + bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP); + }; + + if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) { + TP->async(std::move(LeftRecTask)); + TP->async(std::move(RightRecTask)); + } else { + LeftRecTask(); + RightRecTask(); + } +} + +void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes, + unsigned RecDepth, unsigned LeftBucket, + unsigned RightBucket, + std::mt19937 &RNG) const { + unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); + DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeDegree; + for (auto &N : Nodes) + for (auto &UN : N.UtilityNodes) + ++UtilityNodeDegree[UN]; + // Remove utility nodes if they have just one edge or are connected to all + // functions + for (auto &N : Nodes) + llvm::erase_if(N.UtilityNodes, [&](auto &UN) { + return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes; + }); + + // Renumber utility nodes so they can be used to index into Signatures + DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex; + for (auto &N : Nodes) + for (auto &UN : N.UtilityNodes) + if (!UtilityNodeIndex.count(UN)) + UtilityNodeIndex[UN] = UtilityNodeIndex.size(); + for (auto &N : Nodes) + for (auto &UN : N.UtilityNodes) + UN = UtilityNodeIndex[UN]; + + // Initialize signatures + SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size()); + for (auto &N : Nodes) { + for (auto &UN : N.UtilityNodes) { + assert(UN < Signatures.size()); + if (N.Bucket == LeftBucket) { + Signatures[UN].LeftCount++; + } else { + Signatures[UN].RightCount++; + } + } + } + + for (unsigned I = 0; I < Config.IterationsPerSplit; I++) { + unsigned NumMovedNodes = + runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG); + if (NumMovedNodes == 0) + break; + } +} + +unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes, + unsigned LeftBucket, + unsigned RightBucket, + SignaturesT &Signatures, + std::mt19937 &RNG) const { + // Init signature cost caches + for (auto &Signature : Signatures) { + if (Signature.CachedGainIsValid) + continue; + unsigned L = Signature.LeftCount; + unsigned R = Signature.RightCount; + assert((L > 0 || R > 0) && "incorrect signature"); + float Cost = logCost(L, R); + Signature.CachedGainLR = 0.f; + Signature.CachedGainRL = 0.f; + if (L > 0) + Signature.CachedGainLR = Cost - logCost(L - 1, R + 1); + if (R > 0) + Signature.CachedGainRL = Cost - logCost(L + 1, R - 1); + Signature.CachedGainIsValid = true; + } + + // Compute move gains + typedef std::pair<float, BPFunctionNode *> GainPair; + std::vector<GainPair> Gains; + for (auto &N : Nodes) { + bool FromLeftToRight = (N.Bucket == LeftBucket); + float Gain = moveGain(N, FromLeftToRight, Signatures); + Gains.push_back(std::make_pair(Gain, &N)); + } + + // Collect left and right gains + auto LeftEnd = llvm::partition( + Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; }); + auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd); + auto RightRange = llvm::make_range(LeftEnd, Gains.end()); + + // Sort gains in descending order + auto LargerGain = [](const auto &L, const auto &R) { + return L.first > R.first; + }; + llvm::stable_sort(LeftRange, LargerGain); + llvm::stable_sort(RightRange, LargerGain); + + unsigned NumMovedDataVertices = 0; + for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) { + auto &[LeftGain, LeftNode] = LeftPair; + auto &[RightGain, RightNode] = RightPair; + // Stop when the gain is no longer beneficial + if (LeftGain + RightGain <= 0.f) + break; + // Try to exchange the nodes between buckets + if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG)) + ++NumMovedDataVertices; + if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG)) + ++NumMovedDataVertices; + } + return NumMovedDataVertices; +} + +bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N, + unsigned LeftBucket, + unsigned RightBucket, + SignaturesT &Signatures, + std::mt19937 &RNG) const { + // Sometimes we skip the move. This helps to escape local optima + if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <= + Config.SkipProbability) + return false; + + bool FromLeftToRight = (N.Bucket == LeftBucket); + // Update the current bucket + N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket); + + // Update signatures and invalidate gain cache + if (FromLeftToRight) { + for (auto &UN : N.UtilityNodes) { + auto &Signature = Signatures[UN]; + Signature.LeftCount--; + Signature.RightCount++; + Signature.CachedGainIsValid = false; + } + } else { + for (auto &UN : N.UtilityNodes) { + auto &Signature = Signatures[UN]; + Signature.LeftCount++; + Signature.RightCount--; + Signature.CachedGainIsValid = false; + } + } + return true; +} + +void BalancedPartitioning::split(const FunctionNodeRange Nodes, + unsigned StartBucket) const { + unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); + auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2; + + std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) { + return L.InputOrderIndex < R.InputOrderIndex; + }); + + for (auto &N : llvm::make_range(Nodes.begin(), NodesMid)) + N.Bucket = StartBucket; + for (auto &N : llvm::make_range(NodesMid, Nodes.end())) + N.Bucket = StartBucket + 1; +} + +float BalancedPartitioning::moveGain(const BPFunctionNode &N, + bool FromLeftToRight, + const SignaturesT &Signatures) { + float Gain = 0.f; + for (auto &UN : N.UtilityNodes) + Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR + : Signatures[UN].CachedGainRL); + return Gain; +} + +float BalancedPartitioning::logCost(unsigned X, unsigned Y) const { + return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1)); +} + +float BalancedPartitioning::log2Cached(unsigned i) const { + return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i); +} |