diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Transforms/IPO/SampleProfile.cpp | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'llvm/lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r-- | llvm/lib/Transforms/IPO/SampleProfile.cpp | 186 |
1 files changed, 104 insertions, 82 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index a1fbb1adc412..b6871e260532 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -37,15 +37,16 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" @@ -148,14 +149,17 @@ static cl::opt<bool> ProfileAccurateForSymsInList( "be accurate. It may be overriden by profile-sample-accurate. ")); static cl::opt<bool> ProfileMergeInlinee( - "sample-profile-merge-inlinee", cl::Hidden, cl::init(false), + "sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " - "profile loader decided not to inline a call site.")); + "profile loader decided not to inline a call site. It will " + "only be enabled when top-down order of profile loading is " + "enabled. ")); static cl::opt<bool> ProfileTopDownLoad( - "sample-profile-top-down-load", cl::Hidden, cl::init(false), + "sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " - "order of call graph during sample profile loading.")); + "order of call graph during sample profile loading. It only " + "works for new pass manager. ")); static cl::opt<bool> ProfileSizeInline( "sample-profile-inline-size", cl::Hidden, cl::init(false), @@ -235,7 +239,7 @@ public: DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap) : CurrentReader(Reader), CurrentModule(M), CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) { - if (CurrentReader.getFormat() != SPF_Compact_Binary) + if (!CurrentReader.useMD5()) return; for (const auto &F : CurrentModule) { @@ -261,7 +265,7 @@ public: } ~GUIDToFuncNameMapper() { - if (CurrentReader.getFormat() != SPF_Compact_Binary) + if (!CurrentReader.useMD5()) return; CurrentGUIDToFuncNameMap.clear(); @@ -307,10 +311,12 @@ public: SampleProfileLoader( StringRef Name, StringRef RemapName, bool IsThinLTOPreLink, std::function<AssumptionCache &(Function &)> GetAssumptionCache, - std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo) + std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo, + std::function<const TargetLibraryInfo &(Function &)> GetTLI) : GetAC(std::move(GetAssumptionCache)), - GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this), - Filename(Name), RemappingFilename(RemapName), + GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), + CoverageTracker(*this), Filename(std::string(Name)), + RemappingFilename(std::string(RemapName)), IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); @@ -327,18 +333,19 @@ protected: bool emitAnnotations(Function &F); ErrorOr<uint64_t> getInstWeight(const Instruction &I); ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB); - const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; + const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const; std::vector<const FunctionSamples *> findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - bool inlineCallInstruction(Instruction *I); + bool inlineCallInstruction(CallBase &CB); bool inlineHotFunctions(Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs); // Inline cold/small functions in addition to hot ones - bool shouldInlineColdCallee(Instruction &CallInst); + bool shouldInlineColdCallee(CallBase &CallInst); void emitOptimizationRemarksForInlineCandidates( - const SmallVector<Instruction *, 10> &Candidates, const Function &F, bool Hot); + const SmallVectorImpl<CallBase *> &Candidates, const Function &F, + bool Hot); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -397,6 +404,7 @@ protected: std::function<AssumptionCache &(Function &)> GetAC; std::function<TargetTransformInfo &(Function &)> GetTTI; + std::function<const TargetLibraryInfo &(Function &)> GetTLI; /// Predecessors for each basic block in the CFG. BlockEdgeMap Predecessors; @@ -474,14 +482,17 @@ public: SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile, bool IsThinLTOPreLink = false) - : ModulePass(ID), - SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink, - [&](Function &F) -> AssumptionCache & { - return ACT->getAssumptionCache(F); - }, - [&](Function &F) -> TargetTransformInfo & { - return TTIWP->getTTI(F); - }) { + : ModulePass(ID), SampleLoader( + Name, SampleProfileRemappingFile, IsThinLTOPreLink, + [&](Function &F) -> AssumptionCache & { + return ACT->getAssumptionCache(F); + }, + [&](Function &F) -> TargetTransformInfo & { + return TTIWP->getTTI(F); + }, + [&](Function &F) -> TargetLibraryInfo & { + return TLIWP->getTLI(F); + }) { initializeSampleProfileLoaderLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -498,6 +509,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); } @@ -505,6 +517,7 @@ private: SampleProfileLoader SampleLoader; AssumptionCacheTracker *ACT = nullptr; TargetTransformInfoWrapperPass *TTIWP = nullptr; + TargetLibraryInfoWrapperPass *TLIWP = nullptr; }; } // end anonymous namespace @@ -710,10 +723,9 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { // (findCalleeFunctionSamples returns non-empty result), but not inlined here, // it means that the inlined callsite has no sample, thus the call // instruction should have 0 count. - if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) && - !ImmutableCallSite(&Inst).isIndirectCall() && - findCalleeFunctionSamples(Inst)) - return 0; + if (auto *CB = dyn_cast<CallBase>(&Inst)) + if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) + return 0; const DILocation *DIL = DLoc; uint32_t LineOffset = FunctionSamples::getOffset(DIL); @@ -801,7 +813,7 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) { /// /// \returns The FunctionSamples pointer to the inlined instance. const FunctionSamples * -SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const { +SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const { const DILocation *DIL = Inst.getDebugLoc(); if (!DIL) { return nullptr; @@ -885,13 +897,11 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return it.first->second; } -bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { - assert(isa<CallInst>(I) || isa<InvokeInst>(I)); - CallSite CS(I); - Function *CalledFunction = CS.getCalledFunction(); +bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) { + Function *CalledFunction = CB.getCalledFunction(); assert(CalledFunction); - DebugLoc DLoc = I->getDebugLoc(); - BasicBlock *BB = I->getParent(); + DebugLoc DLoc = CB.getDebugLoc(); + BasicBlock *BB = CB.getParent(); InlineParams Params = getInlineParams(); Params.ComputeFullInlineCost = true; // Checks if there is anything in the reachable portion of the callee at @@ -901,46 +911,43 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { // The acutal cost does not matter because we only checks isNever() to // see if it is legal to inline the callsite. InlineCost Cost = - getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC, - None, nullptr, nullptr); + getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI); if (Cost.isNever()) { ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) << "incompatible inlining"); return false; } - InlineFunctionInfo IFI(nullptr, &GetAC); - if (InlineFunction(CS, IFI)) { + InlineFunctionInfo IFI(nullptr, GetAC); + if (InlineFunction(CB, IFI).isSuccess()) { // The call to InlineFunction erases I, so we can't pass it here. - ORE->emit(OptimizationRemark(CSINLINE_DEBUG, "InlineSuccess", DLoc, BB) - << "inlined callee '" << ore::NV("Callee", CalledFunction) - << "' into '" << ore::NV("Caller", BB->getParent()) << "'"); + emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, + true, CSINLINE_DEBUG); return true; } return false; } -bool SampleProfileLoader::shouldInlineColdCallee(Instruction &CallInst) { +bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) { if (!ProfileSizeInline) return false; - Function *Callee = CallSite(&CallInst).getCalledFunction(); + Function *Callee = CallInst.getCalledFunction(); if (Callee == nullptr) return false; - InlineCost Cost = - getInlineCost(cast<CallBase>(CallInst), getInlineParams(), - GetTTI(*Callee), GetAC, None, nullptr, nullptr); + InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee), + GetAC, GetTLI); return Cost.getCost() <= SampleColdCallSiteThreshold; } void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates( - const SmallVector<Instruction *, 10> &Candidates, const Function &F, + const SmallVectorImpl<CallBase *> &Candidates, const Function &F, bool Hot) { for (auto I : Candidates) { - Function *CalledFunction = CallSite(I).getCalledFunction(); + Function *CalledFunction = I->getCalledFunction(); if (CalledFunction) { - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt", + ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt", I->getDebugLoc(), I->getParent()) << "previous inlining reattempted for " << (Hot ? "hotness: '" : "size: '") @@ -975,43 +982,43 @@ bool SampleProfileLoader::inlineHotFunctions( "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled"); - DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites; + DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites; bool Changed = false; while (true) { bool LocalChanged = false; - SmallVector<Instruction *, 10> CIS; + SmallVector<CallBase *, 10> CIS; for (auto &BB : F) { bool Hot = false; - SmallVector<Instruction *, 10> AllCandidates; - SmallVector<Instruction *, 10> ColdCandidates; + SmallVector<CallBase *, 10> AllCandidates; + SmallVector<CallBase *, 10> ColdCandidates; for (auto &I : BB.getInstList()) { const FunctionSamples *FS = nullptr; - if ((isa<CallInst>(I) || isa<InvokeInst>(I)) && - !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) { - AllCandidates.push_back(&I); - if (FS->getEntrySamples() > 0) - localNotInlinedCallSites.try_emplace(&I, FS); - if (callsiteIsHot(FS, PSI)) - Hot = true; - else if (shouldInlineColdCallee(I)) - ColdCandidates.push_back(&I); + if (auto *CB = dyn_cast<CallBase>(&I)) { + if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) { + AllCandidates.push_back(CB); + if (FS->getEntrySamples() > 0) + localNotInlinedCallSites.try_emplace(CB, FS); + if (callsiteIsHot(FS, PSI)) + Hot = true; + else if (shouldInlineColdCallee(*CB)) + ColdCandidates.push_back(CB); + } } } if (Hot) { CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end()); emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true); - } - else { + } else { CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end()); emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false); } } - for (auto I : CIS) { - Function *CalledFunction = CallSite(I).getCalledFunction(); + for (CallBase *I : CIS) { + Function *CalledFunction = I->getCalledFunction(); // Do not inline recursive calls. if (CalledFunction == &F) continue; - if (CallSite(I).isIndirectCall()) { + if (I->isIndirectCall()) { if (PromotedInsns.count(I)) continue; uint64_t Sum; @@ -1021,7 +1028,7 @@ bool SampleProfileLoader::inlineHotFunctions( PSI->getOrCompHotCountThreshold()); continue; } - auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent()); + auto CalleeFunctionName = FS->getFuncName(); // If it is a recursive call, we do not inline it as it could bloat // the code exponentially. There is way to better handle this, e.g. // clone the caller first, and inline the cloned caller if it is @@ -1038,15 +1045,16 @@ bool SampleProfileLoader::inlineHotFunctions( if (R != SymbolMap.end() && R->getValue() && !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && - isLegalToPromote(CallSite(I), R->getValue(), &Reason)) { + R->getValue()->hasFnAttribute("use-sample-profile") && + isLegalToPromote(*I, R->getValue(), &Reason)) { uint64_t C = FS->getEntrySamples(); - Instruction *DI = - pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE); + auto &DI = + pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE); Sum -= C; PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) && - inlineCallInstruction(DI)) { + inlineCallInstruction(cast<CallBase>(DI))) { localNotInlinedCallSites.erase(I); LocalChanged = true; ++NumCSInlined; @@ -1059,7 +1067,7 @@ bool SampleProfileLoader::inlineHotFunctions( } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { - if (inlineCallInstruction(I)) { + if (inlineCallInstruction(*I)) { localNotInlinedCallSites.erase(I); LocalChanged = true; ++NumCSInlined; @@ -1078,8 +1086,8 @@ bool SampleProfileLoader::inlineHotFunctions( // Accumulate not inlined callsite information into notInlinedSamples for (const auto &Pair : localNotInlinedCallSites) { - Instruction *I = Pair.getFirst(); - Function *Callee = CallSite(I).getCalledFunction(); + CallBase *I = Pair.getFirst(); + Function *Callee = I->getCalledFunction(); if (!Callee || Callee->isDeclaration()) continue; @@ -1525,8 +1533,7 @@ void SampleProfileLoader::propagateWeights(Function &F) { for (auto &I : BB->getInstList()) { if (!isa<CallInst>(I) && !isa<InvokeInst>(I)) continue; - CallSite CS(&I); - if (!CS.getCalledFunction()) { + if (!cast<CallBase>(I).getCalledFunction()) { const DebugLoc &DLoc = I.getDebugLoc(); if (!DLoc) continue; @@ -1770,6 +1777,7 @@ INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) @@ -1780,8 +1788,17 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { FunctionOrderList.reserve(M.size()); if (!ProfileTopDownLoad || CG == nullptr) { + if (ProfileMergeInlinee) { + // Disable ProfileMergeInlinee if profile is not loaded in top down order, + // because the profile for a function may be used for the profile + // annotation of its outline copy before the profile merging of its + // non-inlined inline instances, and that is not the way how + // ProfileMergeInlinee is supposed to work. + ProfileMergeInlinee = false; + } + for (Function &F : M) - if (!F.isDeclaration()) + if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(&F); return FunctionOrderList; } @@ -1791,7 +1808,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { while (!CGI.isAtEnd()) { for (CallGraphNode *node : *CGI) { auto F = node->getFunction(); - if (F && !F->isDeclaration()) + if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(F); } ++CGI; @@ -1839,15 +1856,16 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI, CallGraph *CG) { - GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); if (!ProfileIsValid) return false; + GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); PSI = _PSI; - if (M.getProfileSummary(/* IsCS */ false) == nullptr) + if (M.getProfileSummary(/* IsCS */ false) == nullptr) { M.setProfileSummary(Reader->getSummary().getMD(M.getContext()), ProfileSummary::PSK_Sample); - + PSI->refresh(); + } // Compute the total number of samples collected in this profile. for (const auto &I : Reader->getProfiles()) TotalCollectedSamples += I.second.getTotalSamples(); @@ -1890,6 +1908,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { ACT = &getAnalysis<AssumptionCacheTracker>(); TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); + TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>(); ProfileSummaryInfo *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); return SampleLoader.runOnModule(M, nullptr, PSI, nullptr); @@ -1966,12 +1985,15 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M, auto GetTTI = [&](Function &F) -> TargetTransformInfo & { return FAM.getResult<TargetIRAnalysis>(F); }; + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; SampleProfileLoader SampleLoader( ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, ProfileRemappingFileName.empty() ? SampleProfileRemappingFile : ProfileRemappingFileName, - IsThinLTOPreLink, GetAssumptionCache, GetTTI); + IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI); if (!SampleLoader.doInitialization(M)) return PreservedAnalyses::all(); |