diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp | 41 |
1 files changed, 37 insertions, 4 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp index a961c47a7501..b8fac9d47763 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -84,6 +84,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/SampleProfileInference.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" #include <algorithm> @@ -173,6 +174,9 @@ static cl::opt<bool> cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on.")); +cl::opt<bool> + SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, + cl::desc("Sort profiled recursion by edge weights.")); static cl::opt<bool> ProfileSizeInline( "sample-profile-inline-size", cl::Hidden, cl::init(false), @@ -1648,6 +1652,19 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { SmallVector<uint32_t, 4> Weights; uint32_t MaxWeight = 0; Instruction *MaxDestInst; + // Since profi treats multiple edges (multiway branches) as a single edge, + // we need to distribute the computed weight among the branches. We do + // this by evenly splitting the edge weight among destinations. + DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity; + std::vector<uint64_t> EdgeIndex; + if (SampleProfileUseProfi) { + EdgeIndex.resize(TI->getNumSuccessors()); + for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { + const BasicBlock *Succ = TI->getSuccessor(I); + EdgeIndex[I] = EdgeMultiplicity[Succ]; + EdgeMultiplicity[Succ]++; + } + } for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { BasicBlock *Succ = TI->getSuccessor(I); Edge E = std::make_pair(BB, Succ); @@ -1660,9 +1677,19 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)"); Weight = std::numeric_limits<uint32_t>::max(); } - // Weight is added by one to avoid propagation errors introduced by - // 0 weights. - Weights.push_back(static_cast<uint32_t>(Weight + 1)); + if (!SampleProfileUseProfi) { + // Weight is added by one to avoid propagation errors introduced by + // 0 weights. + Weights.push_back(static_cast<uint32_t>(Weight + 1)); + } else { + // Profi creates proper weights that do not require "+1" adjustments but + // we evenly split the weight among branches with the same destination. + uint64_t W = Weight / EdgeMultiplicity[Succ]; + // Rounding up, if needed, so that first branches are hotter. + if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ]) + W++; + Weights.push_back(static_cast<uint32_t>(W)); + } if (Weight != 0) { if (Weight > MaxWeight) { MaxWeight = Weight; @@ -1853,7 +1880,13 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG); scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get()); while (!CGI.isAtEnd()) { - for (ProfiledCallGraphNode *Node : *CGI) { + auto Range = *CGI; + if (SortProfiledSCC) { + // Sort nodes in one SCC based on callsite hotness. + scc_member_iterator<ProfiledCallGraph *> SI(*CGI); + Range = *SI; + } + for (auto *Node : Range) { Function *F = SymbolMap.lookup(Node->Name); if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(F); |
