aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO/SampleProfile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp186
1 files changed, 104 insertions, 82 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index a1fbb1adc412..b6871e260532 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -37,15 +37,16 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -148,14 +149,17 @@ static cl::opt<bool> ProfileAccurateForSymsInList(
"be accurate. It may be overriden by profile-sample-accurate. "));
static cl::opt<bool> ProfileMergeInlinee(
- "sample-profile-merge-inlinee", cl::Hidden, cl::init(false),
+ "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
cl::desc("Merge past inlinee's profile to outline version if sample "
- "profile loader decided not to inline a call site."));
+ "profile loader decided not to inline a call site. It will "
+ "only be enabled when top-down order of profile loading is "
+ "enabled. "));
static cl::opt<bool> ProfileTopDownLoad(
- "sample-profile-top-down-load", cl::Hidden, cl::init(false),
+ "sample-profile-top-down-load", cl::Hidden, cl::init(true),
cl::desc("Do profile annotation and inlining for functions in top-down "
- "order of call graph during sample profile loading."));
+ "order of call graph during sample profile loading. It only "
+ "works for new pass manager. "));
static cl::opt<bool> ProfileSizeInline(
"sample-profile-inline-size", cl::Hidden, cl::init(false),
@@ -235,7 +239,7 @@ public:
DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
: CurrentReader(Reader), CurrentModule(M),
CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
- if (CurrentReader.getFormat() != SPF_Compact_Binary)
+ if (!CurrentReader.useMD5())
return;
for (const auto &F : CurrentModule) {
@@ -261,7 +265,7 @@ public:
}
~GUIDToFuncNameMapper() {
- if (CurrentReader.getFormat() != SPF_Compact_Binary)
+ if (!CurrentReader.useMD5())
return;
CurrentGUIDToFuncNameMap.clear();
@@ -307,10 +311,12 @@ public:
SampleProfileLoader(
StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
- std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
+ std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI)
: GetAC(std::move(GetAssumptionCache)),
- GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this),
- Filename(Name), RemappingFilename(RemapName),
+ GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
+ CoverageTracker(*this), Filename(std::string(Name)),
+ RemappingFilename(std::string(RemapName)),
IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
@@ -327,18 +333,19 @@ protected:
bool emitAnnotations(Function &F);
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
- const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
+ const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- bool inlineCallInstruction(Instruction *I);
+ bool inlineCallInstruction(CallBase &CB);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
// Inline cold/small functions in addition to hot ones
- bool shouldInlineColdCallee(Instruction &CallInst);
+ bool shouldInlineColdCallee(CallBase &CallInst);
void emitOptimizationRemarksForInlineCandidates(
- const SmallVector<Instruction *, 10> &Candidates, const Function &F, bool Hot);
+ const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
+ bool Hot);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -397,6 +404,7 @@ protected:
std::function<AssumptionCache &(Function &)> GetAC;
std::function<TargetTransformInfo &(Function &)> GetTTI;
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI;
/// Predecessors for each basic block in the CFG.
BlockEdgeMap Predecessors;
@@ -474,14 +482,17 @@ public:
SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
bool IsThinLTOPreLink = false)
- : ModulePass(ID),
- SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
- [&](Function &F) -> AssumptionCache & {
- return ACT->getAssumptionCache(F);
- },
- [&](Function &F) -> TargetTransformInfo & {
- return TTIWP->getTTI(F);
- }) {
+ : ModulePass(ID), SampleLoader(
+ Name, SampleProfileRemappingFile, IsThinLTOPreLink,
+ [&](Function &F) -> AssumptionCache & {
+ return ACT->getAssumptionCache(F);
+ },
+ [&](Function &F) -> TargetTransformInfo & {
+ return TTIWP->getTTI(F);
+ },
+ [&](Function &F) -> TargetLibraryInfo & {
+ return TLIWP->getTLI(F);
+ }) {
initializeSampleProfileLoaderLegacyPassPass(
*PassRegistry::getPassRegistry());
}
@@ -498,6 +509,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
@@ -505,6 +517,7 @@ private:
SampleProfileLoader SampleLoader;
AssumptionCacheTracker *ACT = nullptr;
TargetTransformInfoWrapperPass *TTIWP = nullptr;
+ TargetLibraryInfoWrapperPass *TLIWP = nullptr;
};
} // end anonymous namespace
@@ -710,10 +723,9 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// (findCalleeFunctionSamples returns non-empty result), but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
- if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
- !ImmutableCallSite(&Inst).isIndirectCall() &&
- findCalleeFunctionSamples(Inst))
- return 0;
+ if (auto *CB = dyn_cast<CallBase>(&Inst))
+ if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
+ return 0;
const DILocation *DIL = DLoc;
uint32_t LineOffset = FunctionSamples::getOffset(DIL);
@@ -801,7 +813,7 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
///
/// \returns The FunctionSamples pointer to the inlined instance.
const FunctionSamples *
-SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
+SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
const DILocation *DIL = Inst.getDebugLoc();
if (!DIL) {
return nullptr;
@@ -885,13 +897,11 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
return it.first->second;
}
-bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
- assert(isa<CallInst>(I) || isa<InvokeInst>(I));
- CallSite CS(I);
- Function *CalledFunction = CS.getCalledFunction();
+bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
+ Function *CalledFunction = CB.getCalledFunction();
assert(CalledFunction);
- DebugLoc DLoc = I->getDebugLoc();
- BasicBlock *BB = I->getParent();
+ DebugLoc DLoc = CB.getDebugLoc();
+ BasicBlock *BB = CB.getParent();
InlineParams Params = getInlineParams();
Params.ComputeFullInlineCost = true;
// Checks if there is anything in the reachable portion of the callee at
@@ -901,46 +911,43 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
// The acutal cost does not matter because we only checks isNever() to
// see if it is legal to inline the callsite.
InlineCost Cost =
- getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
- None, nullptr, nullptr);
+ getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI);
if (Cost.isNever()) {
ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
<< "incompatible inlining");
return false;
}
- InlineFunctionInfo IFI(nullptr, &GetAC);
- if (InlineFunction(CS, IFI)) {
+ InlineFunctionInfo IFI(nullptr, GetAC);
+ if (InlineFunction(CB, IFI).isSuccess()) {
// The call to InlineFunction erases I, so we can't pass it here.
- ORE->emit(OptimizationRemark(CSINLINE_DEBUG, "InlineSuccess", DLoc, BB)
- << "inlined callee '" << ore::NV("Callee", CalledFunction)
- << "' into '" << ore::NV("Caller", BB->getParent()) << "'");
+ emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
+ true, CSINLINE_DEBUG);
return true;
}
return false;
}
-bool SampleProfileLoader::shouldInlineColdCallee(Instruction &CallInst) {
+bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
if (!ProfileSizeInline)
return false;
- Function *Callee = CallSite(&CallInst).getCalledFunction();
+ Function *Callee = CallInst.getCalledFunction();
if (Callee == nullptr)
return false;
- InlineCost Cost =
- getInlineCost(cast<CallBase>(CallInst), getInlineParams(),
- GetTTI(*Callee), GetAC, None, nullptr, nullptr);
+ InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
+ GetAC, GetTLI);
return Cost.getCost() <= SampleColdCallSiteThreshold;
}
void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
- const SmallVector<Instruction *, 10> &Candidates, const Function &F,
+ const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
bool Hot) {
for (auto I : Candidates) {
- Function *CalledFunction = CallSite(I).getCalledFunction();
+ Function *CalledFunction = I->getCalledFunction();
if (CalledFunction) {
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
I->getDebugLoc(), I->getParent())
<< "previous inlining reattempted for "
<< (Hot ? "hotness: '" : "size: '")
@@ -975,43 +982,43 @@ bool SampleProfileLoader::inlineHotFunctions(
"ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled");
- DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
+ DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
bool Changed = false;
while (true) {
bool LocalChanged = false;
- SmallVector<Instruction *, 10> CIS;
+ SmallVector<CallBase *, 10> CIS;
for (auto &BB : F) {
bool Hot = false;
- SmallVector<Instruction *, 10> AllCandidates;
- SmallVector<Instruction *, 10> ColdCandidates;
+ SmallVector<CallBase *, 10> AllCandidates;
+ SmallVector<CallBase *, 10> ColdCandidates;
for (auto &I : BB.getInstList()) {
const FunctionSamples *FS = nullptr;
- if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
- !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
- AllCandidates.push_back(&I);
- if (FS->getEntrySamples() > 0)
- localNotInlinedCallSites.try_emplace(&I, FS);
- if (callsiteIsHot(FS, PSI))
- Hot = true;
- else if (shouldInlineColdCallee(I))
- ColdCandidates.push_back(&I);
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
+ AllCandidates.push_back(CB);
+ if (FS->getEntrySamples() > 0)
+ localNotInlinedCallSites.try_emplace(CB, FS);
+ if (callsiteIsHot(FS, PSI))
+ Hot = true;
+ else if (shouldInlineColdCallee(*CB))
+ ColdCandidates.push_back(CB);
+ }
}
}
if (Hot) {
CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
- }
- else {
+ } else {
CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
}
}
- for (auto I : CIS) {
- Function *CalledFunction = CallSite(I).getCalledFunction();
+ for (CallBase *I : CIS) {
+ Function *CalledFunction = I->getCalledFunction();
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
- if (CallSite(I).isIndirectCall()) {
+ if (I->isIndirectCall()) {
if (PromotedInsns.count(I))
continue;
uint64_t Sum;
@@ -1021,7 +1028,7 @@ bool SampleProfileLoader::inlineHotFunctions(
PSI->getOrCompHotCountThreshold());
continue;
}
- auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
+ auto CalleeFunctionName = FS->getFuncName();
// If it is a recursive call, we do not inline it as it could bloat
// the code exponentially. There is way to better handle this, e.g.
// clone the caller first, and inline the cloned caller if it is
@@ -1038,15 +1045,16 @@ bool SampleProfileLoader::inlineHotFunctions(
if (R != SymbolMap.end() && R->getValue() &&
!R->getValue()->isDeclaration() &&
R->getValue()->getSubprogram() &&
- isLegalToPromote(CallSite(I), R->getValue(), &Reason)) {
+ R->getValue()->hasFnAttribute("use-sample-profile") &&
+ isLegalToPromote(*I, R->getValue(), &Reason)) {
uint64_t C = FS->getEntrySamples();
- Instruction *DI =
- pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
+ auto &DI =
+ pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE);
Sum -= C;
PromotedInsns.insert(I);
// If profile mismatches, we should not attempt to inline DI.
if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
- inlineCallInstruction(DI)) {
+ inlineCallInstruction(cast<CallBase>(DI))) {
localNotInlinedCallSites.erase(I);
LocalChanged = true;
++NumCSInlined;
@@ -1059,7 +1067,7 @@ bool SampleProfileLoader::inlineHotFunctions(
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
!CalledFunction->isDeclaration()) {
- if (inlineCallInstruction(I)) {
+ if (inlineCallInstruction(*I)) {
localNotInlinedCallSites.erase(I);
LocalChanged = true;
++NumCSInlined;
@@ -1078,8 +1086,8 @@ bool SampleProfileLoader::inlineHotFunctions(
// Accumulate not inlined callsite information into notInlinedSamples
for (const auto &Pair : localNotInlinedCallSites) {
- Instruction *I = Pair.getFirst();
- Function *Callee = CallSite(I).getCalledFunction();
+ CallBase *I = Pair.getFirst();
+ Function *Callee = I->getCalledFunction();
if (!Callee || Callee->isDeclaration())
continue;
@@ -1525,8 +1533,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
for (auto &I : BB->getInstList()) {
if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
continue;
- CallSite CS(&I);
- if (!CS.getCalledFunction()) {
+ if (!cast<CallBase>(I).getCalledFunction()) {
const DebugLoc &DLoc = I.getDebugLoc();
if (!DLoc)
continue;
@@ -1770,6 +1777,7 @@ INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
@@ -1780,8 +1788,17 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
FunctionOrderList.reserve(M.size());
if (!ProfileTopDownLoad || CG == nullptr) {
+ if (ProfileMergeInlinee) {
+ // Disable ProfileMergeInlinee if profile is not loaded in top down order,
+ // because the profile for a function may be used for the profile
+ // annotation of its outline copy before the profile merging of its
+ // non-inlined inline instances, and that is not the way how
+ // ProfileMergeInlinee is supposed to work.
+ ProfileMergeInlinee = false;
+ }
+
for (Function &F : M)
- if (!F.isDeclaration())
+ if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
FunctionOrderList.push_back(&F);
return FunctionOrderList;
}
@@ -1791,7 +1808,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
while (!CGI.isAtEnd()) {
for (CallGraphNode *node : *CGI) {
auto F = node->getFunction();
- if (F && !F->isDeclaration())
+ if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
FunctionOrderList.push_back(F);
}
++CGI;
@@ -1839,15 +1856,16 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI, CallGraph *CG) {
- GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
if (!ProfileIsValid)
return false;
+ GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
PSI = _PSI;
- if (M.getProfileSummary(/* IsCS */ false) == nullptr)
+ if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
ProfileSummary::PSK_Sample);
-
+ PSI->refresh();
+ }
// Compute the total number of samples collected in this profile.
for (const auto &I : Reader->getProfiles())
TotalCollectedSamples += I.second.getTotalSamples();
@@ -1890,6 +1908,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
ACT = &getAnalysis<AssumptionCacheTracker>();
TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
+ TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
@@ -1966,12 +1985,15 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
+ auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
SampleProfileLoader SampleLoader(
ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
: ProfileRemappingFileName,
- IsThinLTOPreLink, GetAssumptionCache, GetTTI);
+ IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
if (!SampleLoader.doInitialization(M))
return PreservedAnalyses::all();