diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:38:04 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:38:11 +0000 |
| commit | e3b557809604d036af6e00c60f012c2025b59a5e (patch) | |
| tree | 8a11ba2269a3b669601e2fd41145b174008f4da8 /llvm/lib/Transforms/IPO/SampleProfile.cpp | |
| parent | 08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff) | |
Diffstat (limited to 'llvm/lib/Transforms/IPO/SampleProfile.cpp')
| -rw-r--r-- | llvm/lib/Transforms/IPO/SampleProfile.cpp | 255 |
1 files changed, 219 insertions, 36 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index f76b886e810a..93b368fd72a6 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallVector.h" @@ -74,6 +75,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/MisExpect.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" #include <algorithm> @@ -127,6 +129,15 @@ static cl::opt<std::string> SampleProfileRemappingFile( "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden); +static cl::opt<bool> ReportProfileStaleness( + "report-profile-staleness", cl::Hidden, cl::init(false), + cl::desc("Compute and report stale profile statistical metrics.")); + +static cl::opt<bool> PersistProfileStaleness( + "persist-profile-staleness", cl::Hidden, cl::init(false), + cl::desc("Compute stale profile statistical metrics and write it into the " + "native object file(.llvm_stats section).")); + static cl::opt<bool> ProfileSampleAccurate( "profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " @@ -362,7 +373,7 @@ private: FS->GUIDToFuncNameMap = Map; for (const auto &ICS : FS->getCallsiteSamples()) { const FunctionSamplesMap &FSMap = ICS.second; - for (auto &IFS : FSMap) { + for (const auto &IFS : FSMap) { FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second); FSToUpdate.push(&FS); } @@ -412,6 +423,30 @@ using CandidateQueue = PriorityQueue<InlineCandidate, std::vector<InlineCandidate>, CandidateComparer>; +// Sample profile matching - fuzzy match. +class SampleProfileMatcher { + Module &M; + SampleProfileReader &Reader; + const PseudoProbeManager *ProbeManager; + + // Profile mismatching statstics. + uint64_t TotalProfiledCallsites = 0; + uint64_t NumMismatchedCallsites = 0; + uint64_t MismatchedCallsiteSamples = 0; + uint64_t TotalCallsiteSamples = 0; + uint64_t TotalProfiledFunc = 0; + uint64_t NumMismatchedFuncHash = 0; + uint64_t MismatchedFuncHashSamples = 0; + uint64_t TotalFuncHashSamples = 0; + +public: + SampleProfileMatcher(Module &M, SampleProfileReader &Reader, + const PseudoProbeManager *ProbeManager) + : M(M), Reader(Reader), ProbeManager(ProbeManager) {} + void detectProfileMismatch(); + void detectProfileMismatch(const Function &F, const FunctionSamples &FS); +}; + /// Sample profile pass. /// /// This pass reads profile data from the file specified by @@ -459,7 +494,7 @@ protected: bool inlineHotFunctions(Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs); - Optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB); + std::optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB); bool getExternalInlineAdvisorShouldInline(CallBase &CB); InlineCost shouldInlineCandidate(InlineCandidate &Candidate); bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB); @@ -475,7 +510,7 @@ protected: const SmallVectorImpl<CallBase *> &Candidates, const Function &F, bool Hot); void promoteMergeNotInlinedContextSamples( - DenseMap<CallBase *, const FunctionSamples *> NonInlinedCallSites, + MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites, const Function &F); std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG); std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG); @@ -541,6 +576,9 @@ protected: // A pseudo probe helper to correlate the imported sample counts. std::unique_ptr<PseudoProbeManager> ProbeManager; + // A helper to implement the sample profile matching algorithm. + std::unique_ptr<SampleProfileMatcher> MatchingManager; + private: const char *getAnnotatedRemarkPassName() const { return AnnotatedPassName.c_str(); @@ -582,7 +620,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) { assert(FunctionSamples::ProfileIsProbeBased && "Profile is not pseudo probe based"); - Optional<PseudoProbe> Probe = extractProbe(Inst); + std::optional<PseudoProbe> Probe = extractProbe(Inst); // Ignore the non-probe instruction. If none of the instruction in the BB is // probe, we choose to infer the BB's weight. if (!Probe) @@ -735,7 +773,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( const FunctionSamples * SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { if (FunctionSamples::ProfileIsProbeBased) { - Optional<PseudoProbe> Probe = extractProbe(Inst); + std::optional<PseudoProbe> Probe = extractProbe(Inst); if (!Probe) return nullptr; } @@ -984,7 +1022,7 @@ bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) { void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates( const SmallVectorImpl<CallBase *> &Candidates, const Function &F, bool Hot) { - for (auto I : Candidates) { + for (auto *I : Candidates) { Function *CalledFunction = I->getCalledFunction(); if (CalledFunction) { ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), @@ -1106,7 +1144,7 @@ bool SampleProfileLoader::inlineHotFunctions( "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled"); - DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites; + MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites; bool Changed = false; bool LocalChanged = true; while (LocalChanged) { @@ -1116,7 +1154,7 @@ bool SampleProfileLoader::inlineHotFunctions( bool Hot = false; SmallVector<CallBase *, 10> AllCandidates; SmallVector<CallBase *, 10> ColdCandidates; - for (auto &I : BB.getInstList()) { + for (auto &I : BB) { const FunctionSamples *FS = nullptr; if (auto *CB = dyn_cast<CallBase>(&I)) { if (!isa<IntrinsicInst>(I)) { @@ -1126,7 +1164,7 @@ bool SampleProfileLoader::inlineHotFunctions( AllCandidates.push_back(CB); if (FS->getHeadSamplesEstimate() > 0 || FunctionSamples::ProfileIsCS) - LocalNotInlinedCallSites.try_emplace(CB, FS); + LocalNotInlinedCallSites.insert({CB, FS}); if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) Hot = true; else if (shouldInlineColdCallee(*CB)) @@ -1219,13 +1257,11 @@ bool SampleProfileLoader::tryInlineCandidate( InlineFunctionInfo IFI(nullptr, GetAC); IFI.UpdateProfile = false; - if (!InlineFunction(CB, IFI).isSuccess()) + InlineResult IR = InlineFunction(CB, IFI, + /*MergeAttributes=*/true); + if (!IR.isSuccess()) return false; - // Merge the attributes based on the inlining. - AttributeFuncs::mergeAttributesForInlining(*BB->getParent(), - *CalledFunction); - // The call to InlineFunction erases I, so we can't pass it here. emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, true, getAnnotatedRemarkPassName()); @@ -1250,7 +1286,7 @@ bool SampleProfileLoader::tryInlineCandidate( // aggregation of duplication. if (Candidate.CallsiteDistribution < 1) { for (auto &I : IFI.InlinedCallSites) { - if (Optional<PseudoProbe> Probe = extractProbe(*I)) + if (std::optional<PseudoProbe> Probe = extractProbe(*I)) setProbeDistributionFactor(*I, Probe->Factor * Candidate.CallsiteDistribution); } @@ -1275,7 +1311,7 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, return false; float Factor = 1.0; - if (Optional<PseudoProbe> Probe = extractProbe(*CB)) + if (std::optional<PseudoProbe> Probe = extractProbe(*CB)) Factor = Probe->Factor; uint64_t CallsiteCount = @@ -1284,7 +1320,7 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, return true; } -Optional<InlineCost> +std::optional<InlineCost> SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) { std::unique_ptr<InlineAdvice> Advice = nullptr; if (ExternalInlineAdvisor) { @@ -1303,15 +1339,15 @@ SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) { } bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) { - Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB); - return Cost ? !!Cost.value() : false; + std::optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB); + return Cost ? !!*Cost : false; } InlineCost SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { - if (Optional<InlineCost> ReplayCost = + if (std::optional<InlineCost> ReplayCost = getExternalInlineAdvisorCost(*Candidate.CallInstr)) - return ReplayCost.value(); + return *ReplayCost; // Adjust threshold based on call site hotness, only do this for callsite // prioritized inliner because otherwise cost-benefit check is done earlier. int SampleThreshold = SampleColdCallSiteThreshold; @@ -1387,7 +1423,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( CandidateQueue CQueue; InlineCandidate NewCandidate; for (auto &BB : F) { - for (auto &I : BB.getInstList()) { + for (auto &I : BB) { auto *CB = dyn_cast<CallBase>(&I); if (!CB) continue; @@ -1409,7 +1445,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( if (ExternalInlineAdvisor) SizeLimit = std::numeric_limits<unsigned>::max(); - DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites; + MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites; // Perform iterative BFS call site prioritized inlining bool Changed = false; @@ -1466,7 +1502,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( ICPCount++; Changed = true; } else if (!ContextTracker) { - LocalNotInlinedCallSites.try_emplace(I, FS); + LocalNotInlinedCallSites.insert({I, FS}); } } } else if (CalledFunction && CalledFunction->getSubprogram() && @@ -1479,7 +1515,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( } Changed = true; } else if (!ContextTracker) { - LocalNotInlinedCallSites.try_emplace(I, Candidate.CalleeSamples); + LocalNotInlinedCallSites.insert({I, Candidate.CalleeSamples}); } } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { findExternalInlineCandidate(I, findCalleeFunctionSamples(*I), @@ -1505,11 +1541,11 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( } void SampleProfileLoader::promoteMergeNotInlinedContextSamples( - DenseMap<CallBase *, const FunctionSamples *> NonInlinedCallSites, + MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites, const Function &F) { // Accumulate not inlined callsite information into notInlinedSamples for (const auto &Pair : NonInlinedCallSites) { - CallBase *I = Pair.getFirst(); + CallBase *I = Pair.first; Function *Callee = I->getCalledFunction(); if (!Callee || Callee->isDeclaration()) continue; @@ -1521,7 +1557,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( << "' into '" << ore::NV("Caller", &F) << "'"); ++NumCSNotInlined; - const FunctionSamples *FS = Pair.getSecond(); + const FunctionSamples *FS = Pair.second; if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) { continue; } @@ -1581,7 +1617,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { BasicBlock *BB = &BI; if (BlockWeights[BB]) { - for (auto &I : BB->getInstList()) { + for (auto &I : *BB) { if (!isa<CallInst>(I) && !isa<InvokeInst>(I)) continue; if (!cast<CallBase>(I).getCalledFunction()) { @@ -1600,7 +1636,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { // Prorate the callsite counts based on the pre-ICP distribution // factor to reflect what is already done to the callsite before // ICP, such as calliste cloning. - if (Optional<PseudoProbe> Probe = extractProbe(I)) { + if (std::optional<PseudoProbe> Probe = extractProbe(I)) { if (Probe->Factor < 1) T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor); } @@ -1633,7 +1669,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { } else if (OverwriteExistingWeights || ProfileSampleBlockAccurate) { // Set profile metadata (possibly annotated by LTO prelink) to zero or // clear it for cold code. - for (auto &I : BB->getInstList()) { + for (auto &I : *BB) { if (isa<CallInst>(I) || isa<InvokeInst>(I)) { if (cast<CallBase>(I).isIndirectCall()) I.setMetadata(LLVMContext::MD_prof, nullptr); @@ -1704,10 +1740,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { } } - // FIXME: Re-enable for sample profiling after investigating why the sum - // of branch weights can be 0 - // - // misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); + misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); uint64_t TempWeight; // Only set weights if there is at least one non-zero weight. @@ -2013,9 +2046,156 @@ bool SampleProfileLoader::doInitialization(Module &M, } } + if (ReportProfileStaleness || PersistProfileStaleness) { + MatchingManager = + std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get()); + } + return true; } +void SampleProfileMatcher::detectProfileMismatch(const Function &F, + const FunctionSamples &FS) { + if (FunctionSamples::ProfileIsProbeBased) { + uint64_t Count = FS.getTotalSamples(); + TotalFuncHashSamples += Count; + TotalProfiledFunc++; + if (!ProbeManager->profileIsValid(F, FS)) { + MismatchedFuncHashSamples += Count; + NumMismatchedFuncHash++; + return; + } + } + + std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs; + + // Go through all the callsites on the IR and flag the callsite if the target + // name is the same as the one in the profile. + for (auto &BB : F) { + for (auto &I : BB) { + if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I)) + continue; + + const auto *CB = dyn_cast<CallBase>(&I); + if (auto &DLoc = I.getDebugLoc()) { + LineLocation IRCallsite = FunctionSamples::getCallSiteIdentifier(DLoc); + + StringRef CalleeName; + if (Function *Callee = CB->getCalledFunction()) + CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName()); + + const auto CTM = FS.findCallTargetMapAt(IRCallsite); + const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite); + + // Indirect call case. + if (CalleeName.empty()) { + // Since indirect call does not have the CalleeName, check + // conservatively if callsite in the profile is a callsite location. + // This is to avoid nums of false positive since otherwise all the + // indirect call samples will be reported as mismatching. + if ((CTM && !CTM->empty()) || (CallsiteFS && !CallsiteFS->empty())) + MatchedCallsiteLocs.insert(IRCallsite); + } else { + // Check if the call target name is matched for direct call case. + if ((CTM && CTM->count(CalleeName)) || + (CallsiteFS && CallsiteFS->count(CalleeName))) + MatchedCallsiteLocs.insert(IRCallsite); + } + } + } + } + + auto isInvalidLineOffset = [](uint32_t LineOffset) { + return LineOffset & 0x8000; + }; + + // Check if there are any callsites in the profile that does not match to any + // IR callsites, those callsite samples will be discarded. + for (auto &I : FS.getBodySamples()) { + const LineLocation &Loc = I.first; + if (isInvalidLineOffset(Loc.LineOffset)) + continue; + + uint64_t Count = I.second.getSamples(); + if (!I.second.getCallTargets().empty()) { + TotalCallsiteSamples += Count; + TotalProfiledCallsites++; + if (!MatchedCallsiteLocs.count(Loc)) { + MismatchedCallsiteSamples += Count; + NumMismatchedCallsites++; + } + } + } + + for (auto &I : FS.getCallsiteSamples()) { + const LineLocation &Loc = I.first; + if (isInvalidLineOffset(Loc.LineOffset)) + continue; + + uint64_t Count = 0; + for (auto &FM : I.second) { + Count += FM.second.getHeadSamplesEstimate(); + } + TotalCallsiteSamples += Count; + TotalProfiledCallsites++; + if (!MatchedCallsiteLocs.count(Loc)) { + MismatchedCallsiteSamples += Count; + NumMismatchedCallsites++; + } + } +} + +void SampleProfileMatcher::detectProfileMismatch() { + for (auto &F : M) { + if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) + continue; + FunctionSamples *FS = Reader.getSamplesFor(F); + if (!FS) + continue; + detectProfileMismatch(F, *FS); + } + + if (ReportProfileStaleness) { + if (FunctionSamples::ProfileIsProbeBased) { + errs() << "(" << NumMismatchedFuncHash << "/" << TotalProfiledFunc << ")" + << " of functions' profile are invalid and " + << " (" << MismatchedFuncHashSamples << "/" << TotalFuncHashSamples + << ")" + << " of samples are discarded due to function hash mismatch.\n"; + } + errs() << "(" << NumMismatchedCallsites << "/" << TotalProfiledCallsites + << ")" + << " of callsites' profile are invalid and " + << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples + << ")" + << " of samples are discarded due to callsite location mismatch.\n"; + } + + if (PersistProfileStaleness) { + LLVMContext &Ctx = M.getContext(); + MDBuilder MDB(Ctx); + + SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec; + if (FunctionSamples::ProfileIsProbeBased) { + ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash); + ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc); + ProfStatsVec.emplace_back("MismatchedFuncHashSamples", + MismatchedFuncHashSamples); + ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples); + } + + ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites); + ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites); + ProfStatsVec.emplace_back("MismatchedCallsiteSamples", + MismatchedCallsiteSamples); + ProfStatsVec.emplace_back("TotalCallsiteSamples", TotalCallsiteSamples); + + auto *MD = MDB.createLLVMStats(ProfStatsVec); + auto *NMD = M.getOrInsertNamedMetadata("llvm.stats"); + NMD->addOperand(MD); + } +} + bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI, CallGraph *CG) { GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); @@ -2060,8 +2240,11 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, assert(SymbolMap.count(StringRef()) == 0 && "No empty StringRef should be added in SymbolMap"); + if (ReportProfileStaleness || PersistProfileStaleness) + MatchingManager->detectProfileMismatch(); + bool retval = false; - for (auto F : buildFunctionOrder(M, CG)) { + for (auto *F : buildFunctionOrder(M, CG)) { assert(!F->isDeclaration()); clearFunctionData(); retval |= runOnFunction(*F, AM); |
