aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp424
1 files changed, 272 insertions, 152 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index 924a7d5fbd9c..ab1a9a79cad6 100644
--- a/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -1,16 +1,28 @@
//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// Outline cold regions to a separate function.
-// TODO: Update BFI and BPI
-// TODO: Add all the outlined functions to a separate section.
-//
+///
+/// \file
+/// The goal of hot/cold splitting is to improve the memory locality of code.
+/// The splitting pass does this by identifying cold blocks and moving them into
+/// separate functions.
+///
+/// When the splitting pass finds a cold block (referred to as "the sink"), it
+/// grows a maximal cold region around that block. The maximal region contains
+/// all blocks (post-)dominated by the sink [*]. In theory, these blocks are as
+/// cold as the sink. Once a region is found, it's split out of the original
+/// function provided it's profitable to do so.
+///
+/// [*] In practice, there is some added complexity because some blocks are not
+/// safe to extract.
+///
+/// TODO: Use the PM to get domtrees, and preserve BFI/BPI.
+/// TODO: Reorder outlined functions.
+///
//===----------------------------------------------------------------------===//
#include "llvm/ADT/PostOrderIterator.h"
@@ -53,7 +65,6 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -69,16 +80,12 @@ static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
cl::init(true), cl::Hidden);
static cl::opt<int>
- MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden,
- cl::desc("Code size threshold for outlining within a "
- "single BB (as a multiple of TCC_Basic)"));
+ SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden,
+ cl::desc("Base penalty for splitting cold code (as a "
+ "multiple of TCC_Basic)"));
namespace {
-struct PostDomTree : PostDomTreeBase<BasicBlock> {
- PostDomTree(Function &F) { recalculate(F); }
-};
-
/// A sequence of basic blocks.
///
/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
@@ -101,13 +108,14 @@ bool blockEndsInUnreachable(const BasicBlock &BB) {
bool unlikelyExecuted(BasicBlock &BB) {
// Exception handling blocks are unlikely executed.
- if (BB.isEHPad())
+ if (BB.isEHPad() || isa<ResumeInst>(BB.getTerminator()))
return true;
- // The block is cold if it calls/invokes a cold function.
+ // The block is cold if it calls/invokes a cold function. However, do not
+ // mark sanitizer traps as cold.
for (Instruction &I : BB)
if (auto CS = CallSite(&I))
- if (CS.hasFnAttr(Attribute::Cold))
+ if (CS.hasFnAttr(Attribute::Cold) && !CS->getMetadata("nosanitize"))
return true;
// The block is cold if it has an unreachable terminator, unless it's
@@ -125,38 +133,39 @@ bool unlikelyExecuted(BasicBlock &BB) {
/// Check whether it's safe to outline \p BB.
static bool mayExtractBlock(const BasicBlock &BB) {
- return !BB.hasAddressTaken() && !BB.isEHPad();
-}
-
-/// Check whether \p Region is profitable to outline.
-static bool isProfitableToOutline(const BlockSequence &Region,
- TargetTransformInfo &TTI) {
- if (Region.size() > 1)
- return true;
-
- int Cost = 0;
- const BasicBlock &BB = *Region[0];
- for (const Instruction &I : BB) {
- if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
- continue;
-
- Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
-
- if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic))
- return true;
- }
- return false;
+ // EH pads are unsafe to outline because doing so breaks EH type tables. It
+ // follows that invoke instructions cannot be extracted, because CodeExtractor
+ // requires unwind destinations to be within the extraction region.
+ //
+ // Resumes that are not reachable from a cleanup landing pad are considered to
+ // be unreachable. It’s not safe to split them out either.
+ auto Term = BB.getTerminator();
+ return !BB.hasAddressTaken() && !BB.isEHPad() && !isa<InvokeInst>(Term) &&
+ !isa<ResumeInst>(Term);
}
-/// Mark \p F cold. Return true if it's changed.
-static bool markEntireFunctionCold(Function &F) {
- assert(!F.hasFnAttribute(Attribute::OptimizeNone) && "Can't mark this cold");
+/// Mark \p F cold. Based on this assumption, also optimize it for minimum size.
+/// If \p UpdateEntryCount is true (set when this is a new split function and
+/// module has profile data), set entry count to 0 to ensure treated as cold.
+/// Return true if the function is changed.
+static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) {
+ assert(!F.hasOptNone() && "Can't mark this cold");
bool Changed = false;
+ if (!F.hasFnAttribute(Attribute::Cold)) {
+ F.addFnAttr(Attribute::Cold);
+ Changed = true;
+ }
if (!F.hasFnAttribute(Attribute::MinSize)) {
F.addFnAttr(Attribute::MinSize);
Changed = true;
}
- // TODO: Move this function into a cold section.
+ if (UpdateEntryCount) {
+ // Set the entry count to 0 to ensure it is placed in the unlikely text
+ // section when function sections are enabled.
+ F.setEntryCount(0);
+ Changed = true;
+ }
+
return Changed;
}
@@ -165,24 +174,24 @@ public:
HotColdSplitting(ProfileSummaryInfo *ProfSI,
function_ref<BlockFrequencyInfo *(Function &)> GBFI,
function_ref<TargetTransformInfo &(Function &)> GTTI,
- std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
- : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
+ std::function<OptimizationRemarkEmitter &(Function &)> *GORE,
+ function_ref<AssumptionCache *(Function &)> LAC)
+ : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {}
bool run(Module &M);
private:
+ bool isFunctionCold(const Function &F) const;
bool shouldOutlineFrom(const Function &F) const;
- bool outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
- BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
- DominatorTree &DT, PostDomTree &PDT,
- OptimizationRemarkEmitter &ORE);
+ bool outlineColdRegions(Function &F, bool HasProfileSummary);
Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
- OptimizationRemarkEmitter &ORE, unsigned Count);
- SmallPtrSet<const Function *, 2> OutlinedFunctions;
+ OptimizationRemarkEmitter &ORE,
+ AssumptionCache *AC, unsigned Count);
ProfileSummaryInfo *PSI;
function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
function_ref<TargetTransformInfo &(Function &)> GetTTI;
std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
+ function_ref<AssumptionCache *(Function &)> LookupAC;
};
class HotColdSplittingLegacyPass : public ModulePass {
@@ -193,10 +202,10 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addUsedIfAvailable<AssumptionCacheTracker>();
}
bool runOnModule(Module &M) override;
@@ -204,59 +213,141 @@ public:
} // end anonymous namespace
-// Returns false if the function should not be considered for hot-cold split
-// optimization.
-bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
- // Do not try to outline again from an already outlined cold function.
- if (OutlinedFunctions.count(&F))
- return false;
+/// Check whether \p F is inherently cold.
+bool HotColdSplitting::isFunctionCold(const Function &F) const {
+ if (F.hasFnAttribute(Attribute::Cold))
+ return true;
- if (F.size() <= 2)
- return false;
+ if (F.getCallingConv() == CallingConv::Cold)
+ return true;
- // TODO: Consider only skipping functions marked `optnone` or `cold`.
+ if (PSI->isFunctionEntryCold(&F))
+ return true;
- if (F.hasAddressTaken())
- return false;
+ return false;
+}
+// Returns false if the function should not be considered for hot-cold split
+// optimization.
+bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
if (F.hasFnAttribute(Attribute::AlwaysInline))
return false;
if (F.hasFnAttribute(Attribute::NoInline))
return false;
- if (F.getCallingConv() == CallingConv::Cold)
+ if (F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeThread) ||
+ F.hasFnAttribute(Attribute::SanitizeMemory))
return false;
- if (PSI->isFunctionEntryCold(&F))
- return false;
return true;
}
+/// Get the benefit score of outlining \p Region.
+static int getOutliningBenefit(ArrayRef<BasicBlock *> Region,
+ TargetTransformInfo &TTI) {
+ // Sum up the code size costs of non-terminator instructions. Tight coupling
+ // with \ref getOutliningPenalty is needed to model the costs of terminators.
+ int Benefit = 0;
+ for (BasicBlock *BB : Region)
+ for (Instruction &I : BB->instructionsWithoutDebug())
+ if (&I != BB->getTerminator())
+ Benefit +=
+ TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+ return Benefit;
+}
+
+/// Get the penalty score for outlining \p Region.
+static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
+ unsigned NumInputs, unsigned NumOutputs) {
+ int Penalty = SplittingThreshold;
+ LLVM_DEBUG(dbgs() << "Applying penalty for splitting: " << Penalty << "\n");
+
+ // If the splitting threshold is set at or below zero, skip the usual
+ // profitability check.
+ if (SplittingThreshold <= 0)
+ return Penalty;
+
+ // The typical code size cost for materializing an argument for the outlined
+ // call.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n");
+ const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic;
+ Penalty += CostForArgMaterialization * NumInputs;
+
+ // The typical code size cost for an output alloca, its associated store, and
+ // its associated reload.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n");
+ const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
+ Penalty += CostForRegionOutput * NumOutputs;
+
+ // Find the number of distinct exit blocks for the region. Use a conservative
+ // check to determine whether control returns from the region.
+ bool NoBlocksReturn = true;
+ SmallPtrSet<BasicBlock *, 2> SuccsOutsideRegion;
+ for (BasicBlock *BB : Region) {
+ // If a block has no successors, only assume it does not return if it's
+ // unreachable.
+ if (succ_empty(BB)) {
+ NoBlocksReturn &= isa<UnreachableInst>(BB->getTerminator());
+ continue;
+ }
+
+ for (BasicBlock *SuccBB : successors(BB)) {
+ if (find(Region, SuccBB) == Region.end()) {
+ NoBlocksReturn = false;
+ SuccsOutsideRegion.insert(SuccBB);
+ }
+ }
+ }
+
+ // Apply a `noreturn` bonus.
+ if (NoBlocksReturn) {
+ LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size()
+ << " non-returning terminators\n");
+ Penalty -= Region.size();
+ }
+
+ // Apply a penalty for having more than one successor outside of the region.
+ // This penalty accounts for the switch needed in the caller.
+ if (!SuccsOutsideRegion.empty()) {
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size()
+ << " non-region successors\n");
+ Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic;
+ }
+
+ return Penalty;
+}
+
Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
DominatorTree &DT,
BlockFrequencyInfo *BFI,
TargetTransformInfo &TTI,
OptimizationRemarkEmitter &ORE,
+ AssumptionCache *AC,
unsigned Count) {
assert(!Region.empty());
// TODO: Pass BFI and BPI to update profile information.
CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
- /* BPI */ nullptr, /* AllowVarArgs */ false,
+ /* BPI */ nullptr, AC, /* AllowVarArgs */ false,
/* AllowAlloca */ false,
/* Suffix */ "cold." + std::to_string(Count));
+ // Perform a simple cost/benefit analysis to decide whether or not to permit
+ // splitting.
SetVector<Value *> Inputs, Outputs, Sinks;
CE.findInputsOutputs(Inputs, Outputs, Sinks);
-
- // Do not extract regions that have live exit variables.
- if (Outputs.size() > 0) {
- LLVM_DEBUG(llvm::dbgs() << "Not outlining; live outputs\n");
+ int OutliningBenefit = getOutliningBenefit(Region, TTI);
+ int OutliningPenalty =
+ getOutliningPenalty(Region, Inputs.size(), Outputs.size());
+ LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
+ << ", penalty = " << OutliningPenalty << "\n");
+ if (OutliningBenefit <= OutliningPenalty)
return nullptr;
- }
- // TODO: Run MergeBasicBlockIntoOnlyPred on the outlined function.
Function *OrigF = Region[0]->getParent();
if (Function *OutF = CE.extractCodeRegion()) {
User *U = *OutF->user_begin();
@@ -269,9 +360,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
}
CI->setIsNoInline();
- // Try to make the outlined code as small as possible on the assumption
- // that it's cold.
- markEntireFunctionCold(*OutF);
+ markFunctionCold(*OutF, BFI != nullptr);
LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
ORE.emit([&]() {
@@ -298,6 +387,8 @@ using BlockTy = std::pair<BasicBlock *, unsigned>;
namespace {
/// A maximal outlining region. This contains all blocks post-dominated by a
/// sink block, the sink block itself, and all blocks dominated by the sink.
+/// If sink-predecessors and sink-successors cannot be extracted in one region,
+/// the static constructor returns a list of suitable extraction regions.
class OutliningRegion {
/// A list of (block, score) pairs. A block's score is non-zero iff it's a
/// viable sub-region entry point. Blocks with higher scores are better entry
@@ -312,12 +403,9 @@ class OutliningRegion {
/// Whether the entire function is cold.
bool EntireFunctionCold = false;
- /// Whether or not \p BB could be the entry point of an extracted region.
- static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
-
/// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
- return isViableEntryPoint(BB) ? Score : 0;
+ return mayExtractBlock(BB) ? Score : 0;
}
/// These scores should be lower than the score for predecessor blocks,
@@ -333,21 +421,23 @@ public:
OutliningRegion(OutliningRegion &&) = default;
OutliningRegion &operator=(OutliningRegion &&) = default;
- static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
- const PostDomTree &PDT) {
- OutliningRegion ColdRegion;
-
+ static std::vector<OutliningRegion> create(BasicBlock &SinkBB,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT) {
+ std::vector<OutliningRegion> Regions;
SmallPtrSet<BasicBlock *, 4> RegionBlocks;
+ Regions.emplace_back();
+ OutliningRegion *ColdRegion = &Regions.back();
+
auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
RegionBlocks.insert(BB);
- ColdRegion.Blocks.emplace_back(BB, Score);
- assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
+ ColdRegion->Blocks.emplace_back(BB, Score);
};
// The ancestor farthest-away from SinkBB, and also post-dominated by it.
unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
- ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
+ ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
unsigned BestScore = SinkScore;
// Visit SinkBB's ancestors using inverse DFS.
@@ -360,8 +450,8 @@ public:
// If the predecessor is cold and has no predecessors, the entire
// function must be cold.
if (SinkPostDom && pred_empty(&PredBB)) {
- ColdRegion.EntireFunctionCold = true;
- return ColdRegion;
+ ColdRegion->EntireFunctionCold = true;
+ return Regions;
}
// If SinkBB does not post-dominate a predecessor, do not mark the
@@ -376,7 +466,7 @@ public:
// considered as entry points before the sink block.
unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
if (PredScore > BestScore) {
- ColdRegion.SuggestedEntryPoint = &PredBB;
+ ColdRegion->SuggestedEntryPoint = &PredBB;
BestScore = PredScore;
}
@@ -384,9 +474,19 @@ public:
++PredIt;
}
- // Add SinkBB to the cold region. It's considered as an entry point before
- // any sink-successor blocks.
- addBlockToRegion(&SinkBB, SinkScore);
+ // If the sink can be added to the cold region, do so. It's considered as
+ // an entry point before any sink-successor blocks.
+ //
+ // Otherwise, split cold sink-successor blocks using a separate region.
+ // This satisfies the requirement that all extraction blocks other than the
+ // first have predecessors within the extraction region.
+ if (mayExtractBlock(SinkBB)) {
+ addBlockToRegion(&SinkBB, SinkScore);
+ } else {
+ Regions.emplace_back();
+ ColdRegion = &Regions.back();
+ BestScore = 0;
+ }
// Find all successors of SinkBB dominated by SinkBB using DFS.
auto SuccIt = ++df_begin(&SinkBB);
@@ -407,7 +507,7 @@ public:
unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
if (SuccScore > BestScore) {
- ColdRegion.SuggestedEntryPoint = &SuccBB;
+ ColdRegion->SuggestedEntryPoint = &SuccBB;
BestScore = SuccScore;
}
@@ -415,7 +515,7 @@ public:
++SuccIt;
}
- return ColdRegion;
+ return Regions;
}
/// Whether this region has nothing to extract.
@@ -461,11 +561,7 @@ public:
};
} // namespace
-bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
- BlockFrequencyInfo *BFI,
- TargetTransformInfo &TTI,
- DominatorTree &DT, PostDomTree &PDT,
- OptimizationRemarkEmitter &ORE) {
+bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
bool Changed = false;
// The set of cold blocks.
@@ -479,17 +575,28 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
// the first region to contain a block.
ReversePostOrderTraversal<Function *> RPOT(&F);
+ // Calculate domtrees lazily. This reduces compile-time significantly.
+ std::unique_ptr<DominatorTree> DT;
+ std::unique_ptr<PostDominatorTree> PDT;
+
+ // Calculate BFI lazily (it's only used to query ProfileSummaryInfo). This
+ // reduces compile-time significantly. TODO: When we *do* use BFI, we should
+ // be able to salvage its domtrees instead of recomputing them.
+ BlockFrequencyInfo *BFI = nullptr;
+ if (HasProfileSummary)
+ BFI = GetBFI(F);
+
+ TargetTransformInfo &TTI = GetTTI(F);
+ OptimizationRemarkEmitter &ORE = (*GetORE)(F);
+ AssumptionCache *AC = LookupAC(F);
+
// Find all cold regions.
for (BasicBlock *BB : RPOT) {
- // Skip blocks which can't be outlined.
- if (!mayExtractBlock(*BB))
- continue;
-
// This block is already part of some outlining region.
if (ColdBlocks.count(BB))
continue;
- bool Cold = PSI.isColdBlock(BB, BFI) ||
+ bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) ||
(EnableStaticAnalyis && unlikelyExecuted(*BB));
if (!Cold)
continue;
@@ -499,28 +606,35 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
BB->dump();
});
- auto Region = OutliningRegion::create(*BB, DT, PDT);
- if (Region.empty())
- continue;
+ if (!DT)
+ DT = make_unique<DominatorTree>(F);
+ if (!PDT)
+ PDT = make_unique<PostDominatorTree>(F);
- if (Region.isEntireFunctionCold()) {
- LLVM_DEBUG(dbgs() << "Entire function is cold\n");
- return markEntireFunctionCold(F);
- }
+ auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
+ for (OutliningRegion &Region : Regions) {
+ if (Region.empty())
+ continue;
- // If this outlining region intersects with another, drop the new region.
- //
- // TODO: It's theoretically possible to outline more by only keeping the
- // largest region which contains a block, but the extra bookkeeping to do
- // this is tricky/expensive.
- bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
- return !ColdBlocks.insert(Block.first).second;
- });
- if (RegionsOverlap)
- continue;
+ if (Region.isEntireFunctionCold()) {
+ LLVM_DEBUG(dbgs() << "Entire function is cold\n");
+ return markFunctionCold(F);
+ }
+
+ // If this outlining region intersects with another, drop the new region.
+ //
+ // TODO: It's theoretically possible to outline more by only keeping the
+ // largest region which contains a block, but the extra bookkeeping to do
+ // this is tricky/expensive.
+ bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
+ return !ColdBlocks.insert(Block.first).second;
+ });
+ if (RegionsOverlap)
+ continue;
- OutliningWorklist.emplace_back(std::move(Region));
- ++NumColdRegionsFound;
+ OutliningWorklist.emplace_back(std::move(Region));
+ ++NumColdRegionsFound;
+ }
}
// Outline single-entry cold regions, splitting up larger regions as needed.
@@ -529,26 +643,17 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
OutliningRegion Region = OutliningWorklist.pop_back_val();
assert(!Region.empty() && "Empty outlining region in worklist");
do {
- BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT);
- if (!isProfitableToOutline(SubRegion, TTI)) {
- LLVM_DEBUG({
- dbgs() << "Skipping outlining; not profitable to outline\n";
- SubRegion[0]->dump();
- });
- continue;
- }
-
+ BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
LLVM_DEBUG({
dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
for (BasicBlock *BB : SubRegion)
BB->dump();
});
- Function *Outlined =
- extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID);
+ Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC,
+ OutlinedFunctionID);
if (Outlined) {
++OutlinedFunctionID;
- OutlinedFunctions.insert(Outlined);
Changed = true;
}
} while (!Region.empty());
@@ -559,20 +664,31 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
bool HotColdSplitting::run(Module &M) {
bool Changed = false;
- OutlinedFunctions.clear();
- for (auto &F : M) {
+ bool HasProfileSummary = (M.getProfileSummary(/* IsCS */ false) != nullptr);
+ for (auto It = M.begin(), End = M.end(); It != End; ++It) {
+ Function &F = *It;
+
+ // Do not touch declarations.
+ if (F.isDeclaration())
+ continue;
+
+ // Do not modify `optnone` functions.
+ if (F.hasOptNone())
+ continue;
+
+ // Detect inherently cold functions and mark them as such.
+ if (isFunctionCold(F)) {
+ Changed |= markFunctionCold(F);
+ continue;
+ }
+
if (!shouldOutlineFrom(F)) {
LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n");
continue;
}
+
LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
- DominatorTree DT(F);
- PostDomTree PDT(F);
- PDT.recalculate(F);
- BlockFrequencyInfo *BFI = GetBFI(F);
- TargetTransformInfo &TTI = GetTTI(F);
- OptimizationRemarkEmitter &ORE = (*GetORE)(F);
- Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE);
+ Changed |= outlineColdRegions(F, HasProfileSummary);
}
return Changed;
}
@@ -594,17 +710,21 @@ bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
ORE.reset(new OptimizationRemarkEmitter(&F));
return *ORE.get();
};
+ auto LookupAC = [this](Function &F) -> AssumptionCache * {
+ if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
+ return ACT->lookupAssumptionCache(F);
+ return nullptr;
+ };
- return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
+ return HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M);
}
PreservedAnalyses
HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&FAM](Function &F) -> AssumptionCache & {
- return FAM.getResult<AssumptionAnalysis>(F);
+ auto LookupAC = [&FAM](Function &F) -> AssumptionCache * {
+ return FAM.getCachedResult<AssumptionAnalysis>(F);
};
auto GBFI = [&FAM](Function &F) {
@@ -625,7 +745,7 @@ HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
- if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
+ if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}