diff options
Diffstat (limited to 'lib/Transforms/IPO/PartialInlining.cpp')
-rw-r--r-- | lib/Transforms/IPO/PartialInlining.cpp | 82 |
1 files changed, 31 insertions, 51 deletions
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index a9cfd8ded6fb..4907e4b30519 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -202,10 +202,8 @@ struct PartialInlinerImpl { std::function<AssumptionCache &(Function &)> *GetAC, std::function<TargetTransformInfo &(Function &)> *GTTI, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI, - ProfileSummaryInfo *ProfSI, - std::function<OptimizationRemarkEmitter &(Function &)> *GORE) - : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI), - GetORE(GORE) {} + ProfileSummaryInfo *ProfSI) + : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} bool run(Module &M); // Main part of the transformation that calls helper functions to find @@ -217,7 +215,7 @@ struct PartialInlinerImpl { // outline function due to code size. std::pair<bool, Function *> unswitchFunction(Function *F); - // This class speculatively clones the the function to be partial inlined. + // This class speculatively clones the function to be partial inlined. // At the end of partial inlining, the remaining callsites to the cloned // function that are not partially inlined will be fixed up to reference // the original function, and the cloned function will be erased. @@ -271,7 +269,6 @@ private: std::function<TargetTransformInfo &(Function &)> *GetTTI; Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI; ProfileSummaryInfo *PSI; - std::function<OptimizationRemarkEmitter &(Function &)> *GetORE; // Return the frequency of the OutlininingBB relative to F's entry point. // The result is no larger than 1 and is represented using BP. @@ -282,7 +279,8 @@ private: // Return true if the callee of CS should be partially inlined with // profit. bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, - BlockFrequency WeightedOutliningRcost); + BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE); // Try to inline DuplicateFunction (cloned from F with call to // the OutlinedFunction into its callers. Return true @@ -337,7 +335,7 @@ private: std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F); std::unique_ptr<FunctionOutliningMultiRegionInfo> - computeOutliningColdRegionsInfo(Function *F); + computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE); }; struct PartialInlinerLegacyPass : public ModulePass { @@ -362,7 +360,6 @@ struct PartialInlinerLegacyPass : public ModulePass { &getAnalysis<TargetTransformInfoWrapperPass>(); ProfileSummaryInfo *PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - std::unique_ptr<OptimizationRemarkEmitter> UPORE; std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & { @@ -374,14 +371,7 @@ struct PartialInlinerLegacyPass : public ModulePass { return TTIWP->getTTI(F); }; - std::function<OptimizationRemarkEmitter &(Function &)> GetORE = - [&UPORE](Function &F) -> OptimizationRemarkEmitter & { - UPORE.reset(new OptimizationRemarkEmitter(&F)); - return *UPORE.get(); - }; - - return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI, - &GetORE) + return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI) .run(M); } }; @@ -389,7 +379,8 @@ struct PartialInlinerLegacyPass : public ModulePass { } // end anonymous namespace std::unique_ptr<FunctionOutliningMultiRegionInfo> -PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) { +PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F, + OptimizationRemarkEmitter &ORE) { BasicBlock *EntryBlock = &F->front(); DominatorTree DT(*F); @@ -403,8 +394,6 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) { } else BFI = &(*GetBFI)(*F); - auto &ORE = (*GetORE)(*F); - // Return if we don't have profiling information. if (!PSI->hasInstrumentationProfile()) return std::unique_ptr<FunctionOutliningMultiRegionInfo>(); @@ -414,8 +403,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) { auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) { BasicBlock *Dom = BlockList.front(); - return BlockList.size() > 1 && - std::distance(pred_begin(Dom), pred_end(Dom)) == 1; + return BlockList.size() > 1 && pred_size(Dom) == 1; }; auto IsSingleExit = @@ -567,10 +555,6 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) { return is_contained(successors(BB), Succ); }; - auto SuccSize = [](BasicBlock *BB) { - return std::distance(succ_begin(BB), succ_end(BB)); - }; - auto IsReturnBlock = [](BasicBlock *BB) { TerminatorInst *TI = BB->getTerminator(); return isa<ReturnInst>(TI); @@ -607,7 +591,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) { if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks) break; - if (SuccSize(CurrEntry) != 2) + if (succ_size(CurrEntry) != 2) break; BasicBlock *Succ1 = *succ_begin(CurrEntry); @@ -681,7 +665,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) { // peeling off dominating blocks from the outlining region: while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) { BasicBlock *Cand = OutliningInfo->NonReturnBlock; - if (SuccSize(Cand) != 2) + if (succ_size(Cand) != 2) break; if (HasNonEntryPred(Cand)) @@ -766,19 +750,19 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { bool PartialInlinerImpl::shouldPartialInline( CallSite CS, FunctionCloner &Cloner, - BlockFrequency WeightedOutliningRcost) { + BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE) { using namespace ore; - if (SkipCostAnalysis) - return true; - Instruction *Call = CS.getInstruction(); Function *Callee = CS.getCalledFunction(); assert(Callee == Cloner.ClonedFunc); + if (SkipCostAnalysis) + return isInlineViable(*Callee); + Function *Caller = CS.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); - auto &ORE = (*GetORE)(*Caller); InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, *GetAssumptionCache, GetBFI, PSI, &ORE); @@ -1270,14 +1254,14 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) { if (F->user_begin() == F->user_end()) return {false, nullptr}; - auto &ORE = (*GetORE)(*F); + OptimizationRemarkEmitter ORE(F); // Only try to outline cold regions if we have a profile summary, which // implies we have profiling information. if (PSI->hasProfileSummary() && F->hasProfileData() && !DisableMultiRegionPartialInline) { std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI = - computeOutliningColdRegionsInfo(F); + computeOutliningColdRegionsInfo(F, ORE); if (OMRI) { FunctionCloner Cloner(F, OMRI.get(), ORE); @@ -1357,11 +1341,11 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { // inlining the function with outlining (The inliner uses the size increase to // model the cost of inlining a callee). if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) { - auto &ORE = (*GetORE)(*Cloner.OrigFunc); + OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc); DebugLoc DLoc; BasicBlock *Block; std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc); - ORE.emit([&]() { + OrigFuncORE.emit([&]() { return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", DLoc, Block) << ore::NV("Function", Cloner.OrigFunc) @@ -1384,7 +1368,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (CalleeEntryCount) computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap); - uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0); + uint64_t CalleeEntryCountV = + (CalleeEntryCount ? CalleeEntryCount.getCount() : 0); bool AnyInline = false; for (User *User : Users) { @@ -1393,11 +1378,10 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (IsLimitReached()) continue; - - if (!shouldPartialInline(CS, Cloner, WeightedRcost)) + OptimizationRemarkEmitter CallerORE(CS.getCaller()); + if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE)) continue; - auto &ORE = (*GetORE)(*CS.getCaller()); // Construct remark before doing the inlining, as after successful inlining // the callsite is removed. OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()); @@ -1412,7 +1396,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { : nullptr))) continue; - ORE.emit(OR); + CallerORE.emit(OR); // Now update the entry count: if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) { @@ -1433,9 +1417,10 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (AnyInline) { Cloner.IsFunctionInlined = true; if (CalleeEntryCount) - Cloner.OrigFunc->setEntryCount(CalleeEntryCountV); - auto &ORE = (*GetORE)(*Cloner.OrigFunc); - ORE.emit([&]() { + Cloner.OrigFunc->setEntryCount( + CalleeEntryCount.setCount(CalleeEntryCountV)); + OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc); + OrigFuncORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc) << "Partially inlined into at least one caller"; }); @@ -1517,14 +1502,9 @@ PreservedAnalyses PartialInlinerPass::run(Module &M, return FAM.getResult<TargetIRAnalysis>(F); }; - std::function<OptimizationRemarkEmitter &(Function &)> GetORE = - [&FAM](Function &F) -> OptimizationRemarkEmitter & { - return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - }; - ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); - if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI, &GetORE) + if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI) .run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); |