diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp | 424 |
1 files changed, 272 insertions, 152 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index 924a7d5fbd9c..ab1a9a79cad6 100644 --- a/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -1,16 +1,28 @@ //===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// Outline cold regions to a separate function. -// TODO: Update BFI and BPI -// TODO: Add all the outlined functions to a separate section. -// +/// +/// \file +/// The goal of hot/cold splitting is to improve the memory locality of code. +/// The splitting pass does this by identifying cold blocks and moving them into +/// separate functions. +/// +/// When the splitting pass finds a cold block (referred to as "the sink"), it +/// grows a maximal cold region around that block. The maximal region contains +/// all blocks (post-)dominated by the sink [*]. In theory, these blocks are as +/// cold as the sink. Once a region is found, it's split out of the original +/// function provided it's profitable to do so. +/// +/// [*] In practice, there is some added complexity because some blocks are not +/// safe to extract. +/// +/// TODO: Use the PM to get domtrees, and preserve BFI/BPI. +/// TODO: Reorder outlined functions. +/// //===----------------------------------------------------------------------===// #include "llvm/ADT/PostOrderIterator.h" @@ -53,7 +65,6 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -69,16 +80,12 @@ static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis", cl::init(true), cl::Hidden); static cl::opt<int> - MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden, - cl::desc("Code size threshold for outlining within a " - "single BB (as a multiple of TCC_Basic)")); + SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden, + cl::desc("Base penalty for splitting cold code (as a " + "multiple of TCC_Basic)")); namespace { -struct PostDomTree : PostDomTreeBase<BasicBlock> { - PostDomTree(Function &F) { recalculate(F); } -}; - /// A sequence of basic blocks. /// /// A 0-sized SmallVector is slightly cheaper to move than a std::vector. @@ -101,13 +108,14 @@ bool blockEndsInUnreachable(const BasicBlock &BB) { bool unlikelyExecuted(BasicBlock &BB) { // Exception handling blocks are unlikely executed. - if (BB.isEHPad()) + if (BB.isEHPad() || isa<ResumeInst>(BB.getTerminator())) return true; - // The block is cold if it calls/invokes a cold function. + // The block is cold if it calls/invokes a cold function. However, do not + // mark sanitizer traps as cold. for (Instruction &I : BB) if (auto CS = CallSite(&I)) - if (CS.hasFnAttr(Attribute::Cold)) + if (CS.hasFnAttr(Attribute::Cold) && !CS->getMetadata("nosanitize")) return true; // The block is cold if it has an unreachable terminator, unless it's @@ -125,38 +133,39 @@ bool unlikelyExecuted(BasicBlock &BB) { /// Check whether it's safe to outline \p BB. static bool mayExtractBlock(const BasicBlock &BB) { - return !BB.hasAddressTaken() && !BB.isEHPad(); -} - -/// Check whether \p Region is profitable to outline. -static bool isProfitableToOutline(const BlockSequence &Region, - TargetTransformInfo &TTI) { - if (Region.size() > 1) - return true; - - int Cost = 0; - const BasicBlock &BB = *Region[0]; - for (const Instruction &I : BB) { - if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator()) - continue; - - Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); - - if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic)) - return true; - } - return false; + // EH pads are unsafe to outline because doing so breaks EH type tables. It + // follows that invoke instructions cannot be extracted, because CodeExtractor + // requires unwind destinations to be within the extraction region. + // + // Resumes that are not reachable from a cleanup landing pad are considered to + // be unreachable. It’s not safe to split them out either. + auto Term = BB.getTerminator(); + return !BB.hasAddressTaken() && !BB.isEHPad() && !isa<InvokeInst>(Term) && + !isa<ResumeInst>(Term); } -/// Mark \p F cold. Return true if it's changed. -static bool markEntireFunctionCold(Function &F) { - assert(!F.hasFnAttribute(Attribute::OptimizeNone) && "Can't mark this cold"); +/// Mark \p F cold. Based on this assumption, also optimize it for minimum size. +/// If \p UpdateEntryCount is true (set when this is a new split function and +/// module has profile data), set entry count to 0 to ensure treated as cold. +/// Return true if the function is changed. +static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) { + assert(!F.hasOptNone() && "Can't mark this cold"); bool Changed = false; + if (!F.hasFnAttribute(Attribute::Cold)) { + F.addFnAttr(Attribute::Cold); + Changed = true; + } if (!F.hasFnAttribute(Attribute::MinSize)) { F.addFnAttr(Attribute::MinSize); Changed = true; } - // TODO: Move this function into a cold section. + if (UpdateEntryCount) { + // Set the entry count to 0 to ensure it is placed in the unlikely text + // section when function sections are enabled. + F.setEntryCount(0); + Changed = true; + } + return Changed; } @@ -165,24 +174,24 @@ public: HotColdSplitting(ProfileSummaryInfo *ProfSI, function_ref<BlockFrequencyInfo *(Function &)> GBFI, function_ref<TargetTransformInfo &(Function &)> GTTI, - std::function<OptimizationRemarkEmitter &(Function &)> *GORE) - : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {} + std::function<OptimizationRemarkEmitter &(Function &)> *GORE, + function_ref<AssumptionCache *(Function &)> LAC) + : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {} bool run(Module &M); private: + bool isFunctionCold(const Function &F) const; bool shouldOutlineFrom(const Function &F) const; - bool outlineColdRegions(Function &F, ProfileSummaryInfo &PSI, - BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, - DominatorTree &DT, PostDomTree &PDT, - OptimizationRemarkEmitter &ORE); + bool outlineColdRegions(Function &F, bool HasProfileSummary); Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, - OptimizationRemarkEmitter &ORE, unsigned Count); - SmallPtrSet<const Function *, 2> OutlinedFunctions; + OptimizationRemarkEmitter &ORE, + AssumptionCache *AC, unsigned Count); ProfileSummaryInfo *PSI; function_ref<BlockFrequencyInfo *(Function &)> GetBFI; function_ref<TargetTransformInfo &(Function &)> GetTTI; std::function<OptimizationRemarkEmitter &(Function &)> *GetORE; + function_ref<AssumptionCache *(Function &)> LookupAC; }; class HotColdSplittingLegacyPass : public ModulePass { @@ -193,10 +202,10 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<BlockFrequencyInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addUsedIfAvailable<AssumptionCacheTracker>(); } bool runOnModule(Module &M) override; @@ -204,59 +213,141 @@ public: } // end anonymous namespace -// Returns false if the function should not be considered for hot-cold split -// optimization. -bool HotColdSplitting::shouldOutlineFrom(const Function &F) const { - // Do not try to outline again from an already outlined cold function. - if (OutlinedFunctions.count(&F)) - return false; +/// Check whether \p F is inherently cold. +bool HotColdSplitting::isFunctionCold(const Function &F) const { + if (F.hasFnAttribute(Attribute::Cold)) + return true; - if (F.size() <= 2) - return false; + if (F.getCallingConv() == CallingConv::Cold) + return true; - // TODO: Consider only skipping functions marked `optnone` or `cold`. + if (PSI->isFunctionEntryCold(&F)) + return true; - if (F.hasAddressTaken()) - return false; + return false; +} +// Returns false if the function should not be considered for hot-cold split +// optimization. +bool HotColdSplitting::shouldOutlineFrom(const Function &F) const { if (F.hasFnAttribute(Attribute::AlwaysInline)) return false; if (F.hasFnAttribute(Attribute::NoInline)) return false; - if (F.getCallingConv() == CallingConv::Cold) + if (F.hasFnAttribute(Attribute::SanitizeAddress) || + F.hasFnAttribute(Attribute::SanitizeHWAddress) || + F.hasFnAttribute(Attribute::SanitizeThread) || + F.hasFnAttribute(Attribute::SanitizeMemory)) return false; - if (PSI->isFunctionEntryCold(&F)) - return false; return true; } +/// Get the benefit score of outlining \p Region. +static int getOutliningBenefit(ArrayRef<BasicBlock *> Region, + TargetTransformInfo &TTI) { + // Sum up the code size costs of non-terminator instructions. Tight coupling + // with \ref getOutliningPenalty is needed to model the costs of terminators. + int Benefit = 0; + for (BasicBlock *BB : Region) + for (Instruction &I : BB->instructionsWithoutDebug()) + if (&I != BB->getTerminator()) + Benefit += + TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); + + return Benefit; +} + +/// Get the penalty score for outlining \p Region. +static int getOutliningPenalty(ArrayRef<BasicBlock *> Region, + unsigned NumInputs, unsigned NumOutputs) { + int Penalty = SplittingThreshold; + LLVM_DEBUG(dbgs() << "Applying penalty for splitting: " << Penalty << "\n"); + + // If the splitting threshold is set at or below zero, skip the usual + // profitability check. + if (SplittingThreshold <= 0) + return Penalty; + + // The typical code size cost for materializing an argument for the outlined + // call. + LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n"); + const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic; + Penalty += CostForArgMaterialization * NumInputs; + + // The typical code size cost for an output alloca, its associated store, and + // its associated reload. + LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n"); + const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic; + Penalty += CostForRegionOutput * NumOutputs; + + // Find the number of distinct exit blocks for the region. Use a conservative + // check to determine whether control returns from the region. + bool NoBlocksReturn = true; + SmallPtrSet<BasicBlock *, 2> SuccsOutsideRegion; + for (BasicBlock *BB : Region) { + // If a block has no successors, only assume it does not return if it's + // unreachable. + if (succ_empty(BB)) { + NoBlocksReturn &= isa<UnreachableInst>(BB->getTerminator()); + continue; + } + + for (BasicBlock *SuccBB : successors(BB)) { + if (find(Region, SuccBB) == Region.end()) { + NoBlocksReturn = false; + SuccsOutsideRegion.insert(SuccBB); + } + } + } + + // Apply a `noreturn` bonus. + if (NoBlocksReturn) { + LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size() + << " non-returning terminators\n"); + Penalty -= Region.size(); + } + + // Apply a penalty for having more than one successor outside of the region. + // This penalty accounts for the switch needed in the caller. + if (!SuccsOutsideRegion.empty()) { + LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size() + << " non-region successors\n"); + Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic; + } + + return Penalty; +} + Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, OptimizationRemarkEmitter &ORE, + AssumptionCache *AC, unsigned Count) { assert(!Region.empty()); // TODO: Pass BFI and BPI to update profile information. CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr, - /* BPI */ nullptr, /* AllowVarArgs */ false, + /* BPI */ nullptr, AC, /* AllowVarArgs */ false, /* AllowAlloca */ false, /* Suffix */ "cold." + std::to_string(Count)); + // Perform a simple cost/benefit analysis to decide whether or not to permit + // splitting. SetVector<Value *> Inputs, Outputs, Sinks; CE.findInputsOutputs(Inputs, Outputs, Sinks); - - // Do not extract regions that have live exit variables. - if (Outputs.size() > 0) { - LLVM_DEBUG(llvm::dbgs() << "Not outlining; live outputs\n"); + int OutliningBenefit = getOutliningBenefit(Region, TTI); + int OutliningPenalty = + getOutliningPenalty(Region, Inputs.size(), Outputs.size()); + LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit + << ", penalty = " << OutliningPenalty << "\n"); + if (OutliningBenefit <= OutliningPenalty) return nullptr; - } - // TODO: Run MergeBasicBlockIntoOnlyPred on the outlined function. Function *OrigF = Region[0]->getParent(); if (Function *OutF = CE.extractCodeRegion()) { User *U = *OutF->user_begin(); @@ -269,9 +360,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, } CI->setIsNoInline(); - // Try to make the outlined code as small as possible on the assumption - // that it's cold. - markEntireFunctionCold(*OutF); + markFunctionCold(*OutF, BFI != nullptr); LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF); ORE.emit([&]() { @@ -298,6 +387,8 @@ using BlockTy = std::pair<BasicBlock *, unsigned>; namespace { /// A maximal outlining region. This contains all blocks post-dominated by a /// sink block, the sink block itself, and all blocks dominated by the sink. +/// If sink-predecessors and sink-successors cannot be extracted in one region, +/// the static constructor returns a list of suitable extraction regions. class OutliningRegion { /// A list of (block, score) pairs. A block's score is non-zero iff it's a /// viable sub-region entry point. Blocks with higher scores are better entry @@ -312,12 +403,9 @@ class OutliningRegion { /// Whether the entire function is cold. bool EntireFunctionCold = false; - /// Whether or not \p BB could be the entry point of an extracted region. - static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); } - /// If \p BB is a viable entry point, return \p Score. Return 0 otherwise. static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) { - return isViableEntryPoint(BB) ? Score : 0; + return mayExtractBlock(BB) ? Score : 0; } /// These scores should be lower than the score for predecessor blocks, @@ -333,21 +421,23 @@ public: OutliningRegion(OutliningRegion &&) = default; OutliningRegion &operator=(OutliningRegion &&) = default; - static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT, - const PostDomTree &PDT) { - OutliningRegion ColdRegion; - + static std::vector<OutliningRegion> create(BasicBlock &SinkBB, + const DominatorTree &DT, + const PostDominatorTree &PDT) { + std::vector<OutliningRegion> Regions; SmallPtrSet<BasicBlock *, 4> RegionBlocks; + Regions.emplace_back(); + OutliningRegion *ColdRegion = &Regions.back(); + auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) { RegionBlocks.insert(BB); - ColdRegion.Blocks.emplace_back(BB, Score); - assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB"); + ColdRegion->Blocks.emplace_back(BB, Score); }; // The ancestor farthest-away from SinkBB, and also post-dominated by it. unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock); - ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr; + ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr; unsigned BestScore = SinkScore; // Visit SinkBB's ancestors using inverse DFS. @@ -360,8 +450,8 @@ public: // If the predecessor is cold and has no predecessors, the entire // function must be cold. if (SinkPostDom && pred_empty(&PredBB)) { - ColdRegion.EntireFunctionCold = true; - return ColdRegion; + ColdRegion->EntireFunctionCold = true; + return Regions; } // If SinkBB does not post-dominate a predecessor, do not mark the @@ -376,7 +466,7 @@ public: // considered as entry points before the sink block. unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength()); if (PredScore > BestScore) { - ColdRegion.SuggestedEntryPoint = &PredBB; + ColdRegion->SuggestedEntryPoint = &PredBB; BestScore = PredScore; } @@ -384,9 +474,19 @@ public: ++PredIt; } - // Add SinkBB to the cold region. It's considered as an entry point before - // any sink-successor blocks. - addBlockToRegion(&SinkBB, SinkScore); + // If the sink can be added to the cold region, do so. It's considered as + // an entry point before any sink-successor blocks. + // + // Otherwise, split cold sink-successor blocks using a separate region. + // This satisfies the requirement that all extraction blocks other than the + // first have predecessors within the extraction region. + if (mayExtractBlock(SinkBB)) { + addBlockToRegion(&SinkBB, SinkScore); + } else { + Regions.emplace_back(); + ColdRegion = &Regions.back(); + BestScore = 0; + } // Find all successors of SinkBB dominated by SinkBB using DFS. auto SuccIt = ++df_begin(&SinkBB); @@ -407,7 +507,7 @@ public: unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock); if (SuccScore > BestScore) { - ColdRegion.SuggestedEntryPoint = &SuccBB; + ColdRegion->SuggestedEntryPoint = &SuccBB; BestScore = SuccScore; } @@ -415,7 +515,7 @@ public: ++SuccIt; } - return ColdRegion; + return Regions; } /// Whether this region has nothing to extract. @@ -461,11 +561,7 @@ public: }; } // namespace -bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI, - BlockFrequencyInfo *BFI, - TargetTransformInfo &TTI, - DominatorTree &DT, PostDomTree &PDT, - OptimizationRemarkEmitter &ORE) { +bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { bool Changed = false; // The set of cold blocks. @@ -479,17 +575,28 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI, // the first region to contain a block. ReversePostOrderTraversal<Function *> RPOT(&F); + // Calculate domtrees lazily. This reduces compile-time significantly. + std::unique_ptr<DominatorTree> DT; + std::unique_ptr<PostDominatorTree> PDT; + + // Calculate BFI lazily (it's only used to query ProfileSummaryInfo). This + // reduces compile-time significantly. TODO: When we *do* use BFI, we should + // be able to salvage its domtrees instead of recomputing them. + BlockFrequencyInfo *BFI = nullptr; + if (HasProfileSummary) + BFI = GetBFI(F); + + TargetTransformInfo &TTI = GetTTI(F); + OptimizationRemarkEmitter &ORE = (*GetORE)(F); + AssumptionCache *AC = LookupAC(F); + // Find all cold regions. for (BasicBlock *BB : RPOT) { - // Skip blocks which can't be outlined. - if (!mayExtractBlock(*BB)) - continue; - // This block is already part of some outlining region. if (ColdBlocks.count(BB)) continue; - bool Cold = PSI.isColdBlock(BB, BFI) || + bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) || (EnableStaticAnalyis && unlikelyExecuted(*BB)); if (!Cold) continue; @@ -499,28 +606,35 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI, BB->dump(); }); - auto Region = OutliningRegion::create(*BB, DT, PDT); - if (Region.empty()) - continue; + if (!DT) + DT = make_unique<DominatorTree>(F); + if (!PDT) + PDT = make_unique<PostDominatorTree>(F); - if (Region.isEntireFunctionCold()) { - LLVM_DEBUG(dbgs() << "Entire function is cold\n"); - return markEntireFunctionCold(F); - } + auto Regions = OutliningRegion::create(*BB, *DT, *PDT); + for (OutliningRegion &Region : Regions) { + if (Region.empty()) + continue; - // If this outlining region intersects with another, drop the new region. - // - // TODO: It's theoretically possible to outline more by only keeping the - // largest region which contains a block, but the extra bookkeeping to do - // this is tricky/expensive. - bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) { - return !ColdBlocks.insert(Block.first).second; - }); - if (RegionsOverlap) - continue; + if (Region.isEntireFunctionCold()) { + LLVM_DEBUG(dbgs() << "Entire function is cold\n"); + return markFunctionCold(F); + } + + // If this outlining region intersects with another, drop the new region. + // + // TODO: It's theoretically possible to outline more by only keeping the + // largest region which contains a block, but the extra bookkeeping to do + // this is tricky/expensive. + bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) { + return !ColdBlocks.insert(Block.first).second; + }); + if (RegionsOverlap) + continue; - OutliningWorklist.emplace_back(std::move(Region)); - ++NumColdRegionsFound; + OutliningWorklist.emplace_back(std::move(Region)); + ++NumColdRegionsFound; + } } // Outline single-entry cold regions, splitting up larger regions as needed. @@ -529,26 +643,17 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI, OutliningRegion Region = OutliningWorklist.pop_back_val(); assert(!Region.empty() && "Empty outlining region in worklist"); do { - BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT); - if (!isProfitableToOutline(SubRegion, TTI)) { - LLVM_DEBUG({ - dbgs() << "Skipping outlining; not profitable to outline\n"; - SubRegion[0]->dump(); - }); - continue; - } - + BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT); LLVM_DEBUG({ dbgs() << "Hot/cold splitting attempting to outline these blocks:\n"; for (BasicBlock *BB : SubRegion) BB->dump(); }); - Function *Outlined = - extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID); + Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC, + OutlinedFunctionID); if (Outlined) { ++OutlinedFunctionID; - OutlinedFunctions.insert(Outlined); Changed = true; } } while (!Region.empty()); @@ -559,20 +664,31 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI, bool HotColdSplitting::run(Module &M) { bool Changed = false; - OutlinedFunctions.clear(); - for (auto &F : M) { + bool HasProfileSummary = (M.getProfileSummary(/* IsCS */ false) != nullptr); + for (auto It = M.begin(), End = M.end(); It != End; ++It) { + Function &F = *It; + + // Do not touch declarations. + if (F.isDeclaration()) + continue; + + // Do not modify `optnone` functions. + if (F.hasOptNone()) + continue; + + // Detect inherently cold functions and mark them as such. + if (isFunctionCold(F)) { + Changed |= markFunctionCold(F); + continue; + } + if (!shouldOutlineFrom(F)) { LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n"); continue; } + LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n"); - DominatorTree DT(F); - PostDomTree PDT(F); - PDT.recalculate(F); - BlockFrequencyInfo *BFI = GetBFI(F); - TargetTransformInfo &TTI = GetTTI(F); - OptimizationRemarkEmitter &ORE = (*GetORE)(F); - Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE); + Changed |= outlineColdRegions(F, HasProfileSummary); } return Changed; } @@ -594,17 +710,21 @@ bool HotColdSplittingLegacyPass::runOnModule(Module &M) { ORE.reset(new OptimizationRemarkEmitter(&F)); return *ORE.get(); }; + auto LookupAC = [this](Function &F) -> AssumptionCache * { + if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>()) + return ACT->lookupAssumptionCache(F); + return nullptr; + }; - return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M); + return HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M); } PreservedAnalyses HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); - std::function<AssumptionCache &(Function &)> GetAssumptionCache = - [&FAM](Function &F) -> AssumptionCache & { - return FAM.getResult<AssumptionAnalysis>(F); + auto LookupAC = [&FAM](Function &F) -> AssumptionCache * { + return FAM.getCachedResult<AssumptionAnalysis>(F); }; auto GBFI = [&FAM](Function &F) { @@ -625,7 +745,7 @@ HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) { ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); - if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M)) + if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } |