summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO/SampleProfile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp168
1 files changed, 149 insertions, 19 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 6184681db8a2..a1fbb1adc412 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -26,13 +26,17 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -57,6 +61,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProf.h"
@@ -90,6 +95,12 @@ using namespace llvm;
using namespace sampleprof;
using ProfileCount = Function::ProfileCount;
#define DEBUG_TYPE "sample-profile"
+#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
+
+STATISTIC(NumCSInlined,
+ "Number of functions inlined with context sensitive profile");
+STATISTIC(NumCSNotInlined,
+ "Number of functions not inlined with context sensitive profile");
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
@@ -136,6 +147,25 @@ static cl::opt<bool> ProfileAccurateForSymsInList(
cl::desc("For symbols in profile symbol list, regard their profiles to "
"be accurate. It may be overriden by profile-sample-accurate. "));
+static cl::opt<bool> ProfileMergeInlinee(
+ "sample-profile-merge-inlinee", cl::Hidden, cl::init(false),
+ cl::desc("Merge past inlinee's profile to outline version if sample "
+ "profile loader decided not to inline a call site."));
+
+static cl::opt<bool> ProfileTopDownLoad(
+ "sample-profile-top-down-load", cl::Hidden, cl::init(false),
+ cl::desc("Do profile annotation and inlining for functions in top-down "
+ "order of call graph during sample profile loading."));
+
+static cl::opt<bool> ProfileSizeInline(
+ "sample-profile-inline-size", cl::Hidden, cl::init(false),
+ cl::desc("Inline cold call sites in profile loader if it's beneficial "
+ "for code size."));
+
+static cl::opt<int> SampleColdCallSiteThreshold(
+ "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
+ cl::desc("Threshold for inlining cold callsites"));
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -285,7 +315,7 @@ public:
bool doInitialization(Module &M);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
- ProfileSummaryInfo *_PSI);
+ ProfileSummaryInfo *_PSI, CallGraph *CG);
void dump() { Reader->dump(); }
@@ -305,6 +335,10 @@ protected:
bool inlineCallInstruction(Instruction *I);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ // Inline cold/small functions in addition to hot ones
+ bool shouldInlineColdCallee(Instruction &CallInst);
+ void emitOptimizationRemarksForInlineCandidates(
+ const SmallVector<Instruction *, 10> &Candidates, const Function &F, bool Hot);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -317,6 +351,7 @@ protected:
void propagateWeights(Function &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
+ std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
void clearFunctionData();
@@ -869,21 +904,52 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
None, nullptr, nullptr);
if (Cost.isNever()) {
- ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
<< "incompatible inlining");
return false;
}
InlineFunctionInfo IFI(nullptr, &GetAC);
if (InlineFunction(CS, IFI)) {
// The call to InlineFunction erases I, so we can't pass it here.
- ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)
- << "inlined hot callee '" << ore::NV("Callee", CalledFunction)
+ ORE->emit(OptimizationRemark(CSINLINE_DEBUG, "InlineSuccess", DLoc, BB)
+ << "inlined callee '" << ore::NV("Callee", CalledFunction)
<< "' into '" << ore::NV("Caller", BB->getParent()) << "'");
return true;
}
return false;
}
+bool SampleProfileLoader::shouldInlineColdCallee(Instruction &CallInst) {
+ if (!ProfileSizeInline)
+ return false;
+
+ Function *Callee = CallSite(&CallInst).getCalledFunction();
+ if (Callee == nullptr)
+ return false;
+
+ InlineCost Cost =
+ getInlineCost(cast<CallBase>(CallInst), getInlineParams(),
+ GetTTI(*Callee), GetAC, None, nullptr, nullptr);
+
+ return Cost.getCost() <= SampleColdCallSiteThreshold;
+}
+
+void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
+ const SmallVector<Instruction *, 10> &Candidates, const Function &F,
+ bool Hot) {
+ for (auto I : Candidates) {
+ Function *CalledFunction = CallSite(I).getCalledFunction();
+ if (CalledFunction) {
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
+ I->getDebugLoc(), I->getParent())
+ << "previous inlining reattempted for "
+ << (Hot ? "hotness: '" : "size: '")
+ << ore::NV("Callee", CalledFunction) << "' into '"
+ << ore::NV("Caller", &F) << "'");
+ }
+ }
+}
+
/// Iteratively inline hot callsites of a function.
///
/// Iteratively traverse all callsites of the function \p F, and find if
@@ -916,20 +982,28 @@ bool SampleProfileLoader::inlineHotFunctions(
SmallVector<Instruction *, 10> CIS;
for (auto &BB : F) {
bool Hot = false;
- SmallVector<Instruction *, 10> Candidates;
+ SmallVector<Instruction *, 10> AllCandidates;
+ SmallVector<Instruction *, 10> ColdCandidates;
for (auto &I : BB.getInstList()) {
const FunctionSamples *FS = nullptr;
if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
- Candidates.push_back(&I);
+ AllCandidates.push_back(&I);
if (FS->getEntrySamples() > 0)
localNotInlinedCallSites.try_emplace(&I, FS);
if (callsiteIsHot(FS, PSI))
Hot = true;
+ else if (shouldInlineColdCallee(I))
+ ColdCandidates.push_back(&I);
}
}
if (Hot) {
- CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end());
+ CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
+ emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
+ }
+ else {
+ CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
+ emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
}
}
for (auto I : CIS) {
@@ -975,6 +1049,7 @@ bool SampleProfileLoader::inlineHotFunctions(
inlineCallInstruction(DI)) {
localNotInlinedCallSites.erase(I);
LocalChanged = true;
+ ++NumCSInlined;
}
} else {
LLVM_DEBUG(dbgs()
@@ -987,6 +1062,7 @@ bool SampleProfileLoader::inlineHotFunctions(
if (inlineCallInstruction(I)) {
localNotInlinedCallSites.erase(I);
LocalChanged = true;
+ ++NumCSInlined;
}
} else if (IsThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions(
@@ -1006,10 +1082,35 @@ bool SampleProfileLoader::inlineHotFunctions(
Function *Callee = CallSite(I).getCalledFunction();
if (!Callee || Callee->isDeclaration())
continue;
+
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
+ I->getDebugLoc(), I->getParent())
+ << "previous inlining not repeated: '"
+ << ore::NV("Callee", Callee) << "' into '"
+ << ore::NV("Caller", &F) << "'");
+
+ ++NumCSNotInlined;
const FunctionSamples *FS = Pair.getSecond();
- auto pair =
- notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
- pair.first->second.entryCount += FS->getEntrySamples();
+ if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
+ continue;
+ }
+
+ if (ProfileMergeInlinee) {
+ // Use entry samples as head samples during the merge, as inlinees
+ // don't have head samples.
+ assert(FS->getHeadSamples() == 0 && "Expect 0 head sample for inlinee");
+ const_cast<FunctionSamples *>(FS)->addHeadSamples(FS->getEntrySamples());
+
+ // Note that we have to do the merge right after processing function.
+ // This allows OutlineFS's profile to be used for annotation during
+ // top-down processing of functions' annotation.
+ FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
+ OutlineFS->merge(*FS);
+ } else {
+ auto pair =
+ notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
+ pair.first->second.entryCount += FS->getEntrySamples();
+ }
}
return Changed;
}
@@ -1673,6 +1774,33 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
+std::vector<Function *>
+SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
+ std::vector<Function *> FunctionOrderList;
+ FunctionOrderList.reserve(M.size());
+
+ if (!ProfileTopDownLoad || CG == nullptr) {
+ for (Function &F : M)
+ if (!F.isDeclaration())
+ FunctionOrderList.push_back(&F);
+ return FunctionOrderList;
+ }
+
+ assert(&CG->getModule() == &M);
+ scc_iterator<CallGraph *> CGI = scc_begin(CG);
+ while (!CGI.isAtEnd()) {
+ for (CallGraphNode *node : *CGI) {
+ auto F = node->getFunction();
+ if (F && !F->isDeclaration())
+ FunctionOrderList.push_back(F);
+ }
+ ++CGI;
+ }
+
+ std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
+ return FunctionOrderList;
+}
+
bool SampleProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
@@ -1710,7 +1838,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
}
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
- ProfileSummaryInfo *_PSI) {
+ ProfileSummaryInfo *_PSI, CallGraph *CG) {
GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
if (!ProfileIsValid)
return false;
@@ -1745,11 +1873,11 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
}
bool retval = false;
- for (auto &F : M)
- if (!F.isDeclaration()) {
- clearFunctionData();
- retval |= runOnFunction(F, AM);
- }
+ for (auto F : buildFunctionOrder(M, CG)) {
+ assert(!F->isDeclaration());
+ clearFunctionData();
+ retval |= runOnFunction(*F, AM);
+ }
// Account for cold calls not inlined....
for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
@@ -1764,7 +1892,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- return SampleLoader.runOnModule(M, nullptr, PSI);
+ return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
@@ -1845,10 +1973,12 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
: ProfileRemappingFileName,
IsThinLTOPreLink, GetAssumptionCache, GetTTI);
- SampleLoader.doInitialization(M);
+ if (!SampleLoader.doInitialization(M))
+ return PreservedAnalyses::all();
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
- if (!SampleLoader.runOnModule(M, &AM, PSI))
+ CallGraph &CG = AM.getResult<CallGraphAnalysis>(M);
+ if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
return PreservedAnalyses::all();
return PreservedAnalyses::none();