diff options
Diffstat (limited to 'lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r-- | lib/Transforms/IPO/SampleProfile.cpp | 66 |
1 files changed, 52 insertions, 14 deletions
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 9f123c2b875e..877d20e72ffc 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -1,9 +1,8 @@ //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -319,6 +318,14 @@ protected: /// Optimization Remark Emitter used to emit diagnostic remarks. OptimizationRemarkEmitter *ORE = nullptr; + + // Information recorded when we declined to inline a call site + // because we have determined it is too cold is accumulated for + // each callee function. Initially this is just the entry count. + struct NotInlinedProfileInfo { + uint64_t entryCount; + }; + DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -745,8 +752,9 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { // when cost exceeds threshold without checking all IRs in the callee. // The acutal cost does not matter because we only checks isNever() to // see if it is legal to inline the callsite. - InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC, - None, nullptr, nullptr); + InlineCost Cost = + getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC, + None, nullptr, nullptr); if (Cost.isNever()) { ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB) << "incompatible inlining"); @@ -779,6 +787,8 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { bool SampleProfileLoader::inlineHotFunctions( Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { DenseSet<Instruction *> PromotedInsns; + + DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites; bool Changed = false; while (true) { bool LocalChanged = false; @@ -791,6 +801,8 @@ bool SampleProfileLoader::inlineHotFunctions( if ((isa<CallInst>(I) || isa<InvokeInst>(I)) && !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) { Candidates.push_back(&I); + if (FS->getEntrySamples() > 0) + localNotInlinedCallSites.try_emplace(&I, FS); if (callsiteIsHot(FS, PSI)) Hot = true; } @@ -823,6 +835,9 @@ bool SampleProfileLoader::inlineHotFunctions( if (CalleeFunctionName == F.getName()) continue; + if (!callsiteIsHot(FS, PSI)) + continue; + const char *Reason = "Callee function not available"; auto R = SymbolMap.find(CalleeFunctionName); if (R != SymbolMap.end() && R->getValue() && @@ -836,8 +851,10 @@ bool SampleProfileLoader::inlineHotFunctions( PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) && - inlineCallInstruction(DI)) + inlineCallInstruction(DI)) { + localNotInlinedCallSites.erase(I); LocalChanged = true; + } } else { LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to " @@ -846,8 +863,10 @@ bool SampleProfileLoader::inlineHotFunctions( } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { - if (inlineCallInstruction(I)) + if (inlineCallInstruction(I)) { + localNotInlinedCallSites.erase(I); LocalChanged = true; + } } else if (IsThinLTOPreLink) { findCalleeFunctionSamples(*I)->findInlinedFunctions( InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); @@ -859,6 +878,18 @@ bool SampleProfileLoader::inlineHotFunctions( break; } } + + // Accumulate not inlined callsite information into notInlinedSamples + for (const auto &Pair : localNotInlinedCallSites) { + Instruction *I = Pair.getFirst(); + Function *Callee = CallSite(I).getCalledFunction(); + if (!Callee || Callee->isDeclaration()) + continue; + const FunctionSamples *FS = Pair.getSecond(); + auto pair = + notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); + pair.first->second.entryCount += FS->getEntrySamples(); + } return Changed; } @@ -1299,10 +1330,10 @@ void SampleProfileLoader::propagateWeights(Function &F) { annotateValueSite(*I.getParent()->getParent()->getParent(), I, SortedCallTargets, Sum, IPVK_IndirectCallTarget, SortedCallTargets.size()); - } else if (!dyn_cast<IntrinsicInst>(&I)) { - SmallVector<uint32_t, 1> Weights; - Weights.push_back(BlockWeights[BB]); - I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + } else if (!isa<IntrinsicInst>(&I)) { + I.setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights( + {static_cast<uint32_t>(BlockWeights[BB])})); } } } @@ -1568,8 +1599,9 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, return false; PSI = _PSI; - if (M.getProfileSummary() == nullptr) - M.setProfileSummary(Reader->getSummary().getMD(M.getContext())); + if (M.getProfileSummary(/* IsCS */ false) == nullptr) + M.setProfileSummary(Reader->getSummary().getMD(M.getContext()), + ProfileSummary::PSK_Sample); // Compute the total number of samples collected in this profile. for (const auto &I : Reader->getProfiles()) @@ -1601,6 +1633,12 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, clearFunctionData(); retval |= runOnFunction(F, AM); } + + // Account for cold calls not inlined.... + for (const std::pair<Function *, NotInlinedProfileInfo> &pair : + notInlinedCallInfo) + updateProfileCallee(pair.first, pair.second.entryCount); + return retval; } |