aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO/SampleProfile.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-02-11 12:38:04 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-02-11 12:38:11 +0000
commite3b557809604d036af6e00c60f012c2025b59a5e (patch)
tree8a11ba2269a3b669601e2fd41145b174008f4da8 /llvm/lib/Transforms/IPO/SampleProfile.cpp
parent08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff)
Diffstat (limited to 'llvm/lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp255
1 files changed, 219 insertions, 36 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index f76b886e810a..93b368fd72a6 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -74,6 +75,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <algorithm>
@@ -127,6 +129,15 @@ static cl::opt<std::string> SampleProfileRemappingFile(
"sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
+static cl::opt<bool> ReportProfileStaleness(
+ "report-profile-staleness", cl::Hidden, cl::init(false),
+ cl::desc("Compute and report stale profile statistical metrics."));
+
+static cl::opt<bool> PersistProfileStaleness(
+ "persist-profile-staleness", cl::Hidden, cl::init(false),
+ cl::desc("Compute stale profile statistical metrics and write it into the "
+ "native object file(.llvm_stats section)."));
+
static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
@@ -362,7 +373,7 @@ private:
FS->GUIDToFuncNameMap = Map;
for (const auto &ICS : FS->getCallsiteSamples()) {
const FunctionSamplesMap &FSMap = ICS.second;
- for (auto &IFS : FSMap) {
+ for (const auto &IFS : FSMap) {
FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
FSToUpdate.push(&FS);
}
@@ -412,6 +423,30 @@ using CandidateQueue =
PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
CandidateComparer>;
+// Sample profile matching - fuzzy match.
+class SampleProfileMatcher {
+ Module &M;
+ SampleProfileReader &Reader;
+ const PseudoProbeManager *ProbeManager;
+
+ // Profile mismatching statstics.
+ uint64_t TotalProfiledCallsites = 0;
+ uint64_t NumMismatchedCallsites = 0;
+ uint64_t MismatchedCallsiteSamples = 0;
+ uint64_t TotalCallsiteSamples = 0;
+ uint64_t TotalProfiledFunc = 0;
+ uint64_t NumMismatchedFuncHash = 0;
+ uint64_t MismatchedFuncHashSamples = 0;
+ uint64_t TotalFuncHashSamples = 0;
+
+public:
+ SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
+ const PseudoProbeManager *ProbeManager)
+ : M(M), Reader(Reader), ProbeManager(ProbeManager) {}
+ void detectProfileMismatch();
+ void detectProfileMismatch(const Function &F, const FunctionSamples &FS);
+};
+
/// Sample profile pass.
///
/// This pass reads profile data from the file specified by
@@ -459,7 +494,7 @@ protected:
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
- Optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
+ std::optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
bool getExternalInlineAdvisorShouldInline(CallBase &CB);
InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
@@ -475,7 +510,7 @@ protected:
const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
bool Hot);
void promoteMergeNotInlinedContextSamples(
- DenseMap<CallBase *, const FunctionSamples *> NonInlinedCallSites,
+ MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
const Function &F);
std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
@@ -541,6 +576,9 @@ protected:
// A pseudo probe helper to correlate the imported sample counts.
std::unique_ptr<PseudoProbeManager> ProbeManager;
+ // A helper to implement the sample profile matching algorithm.
+ std::unique_ptr<SampleProfileMatcher> MatchingManager;
+
private:
const char *getAnnotatedRemarkPassName() const {
return AnnotatedPassName.c_str();
@@ -582,7 +620,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
assert(FunctionSamples::ProfileIsProbeBased &&
"Profile is not pseudo probe based");
- Optional<PseudoProbe> Probe = extractProbe(Inst);
+ std::optional<PseudoProbe> Probe = extractProbe(Inst);
// Ignore the non-probe instruction. If none of the instruction in the BB is
// probe, we choose to infer the BB's weight.
if (!Probe)
@@ -735,7 +773,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
if (FunctionSamples::ProfileIsProbeBased) {
- Optional<PseudoProbe> Probe = extractProbe(Inst);
+ std::optional<PseudoProbe> Probe = extractProbe(Inst);
if (!Probe)
return nullptr;
}
@@ -984,7 +1022,7 @@ bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
bool Hot) {
- for (auto I : Candidates) {
+ for (auto *I : Candidates) {
Function *CalledFunction = I->getCalledFunction();
if (CalledFunction) {
ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
@@ -1106,7 +1144,7 @@ bool SampleProfileLoader::inlineHotFunctions(
"ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled");
- DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
+ MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
bool Changed = false;
bool LocalChanged = true;
while (LocalChanged) {
@@ -1116,7 +1154,7 @@ bool SampleProfileLoader::inlineHotFunctions(
bool Hot = false;
SmallVector<CallBase *, 10> AllCandidates;
SmallVector<CallBase *, 10> ColdCandidates;
- for (auto &I : BB.getInstList()) {
+ for (auto &I : BB) {
const FunctionSamples *FS = nullptr;
if (auto *CB = dyn_cast<CallBase>(&I)) {
if (!isa<IntrinsicInst>(I)) {
@@ -1126,7 +1164,7 @@ bool SampleProfileLoader::inlineHotFunctions(
AllCandidates.push_back(CB);
if (FS->getHeadSamplesEstimate() > 0 ||
FunctionSamples::ProfileIsCS)
- LocalNotInlinedCallSites.try_emplace(CB, FS);
+ LocalNotInlinedCallSites.insert({CB, FS});
if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
Hot = true;
else if (shouldInlineColdCallee(*CB))
@@ -1219,13 +1257,11 @@ bool SampleProfileLoader::tryInlineCandidate(
InlineFunctionInfo IFI(nullptr, GetAC);
IFI.UpdateProfile = false;
- if (!InlineFunction(CB, IFI).isSuccess())
+ InlineResult IR = InlineFunction(CB, IFI,
+ /*MergeAttributes=*/true);
+ if (!IR.isSuccess())
return false;
- // Merge the attributes based on the inlining.
- AttributeFuncs::mergeAttributesForInlining(*BB->getParent(),
- *CalledFunction);
-
// The call to InlineFunction erases I, so we can't pass it here.
emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(),
Cost, true, getAnnotatedRemarkPassName());
@@ -1250,7 +1286,7 @@ bool SampleProfileLoader::tryInlineCandidate(
// aggregation of duplication.
if (Candidate.CallsiteDistribution < 1) {
for (auto &I : IFI.InlinedCallSites) {
- if (Optional<PseudoProbe> Probe = extractProbe(*I))
+ if (std::optional<PseudoProbe> Probe = extractProbe(*I))
setProbeDistributionFactor(*I, Probe->Factor *
Candidate.CallsiteDistribution);
}
@@ -1275,7 +1311,7 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
return false;
float Factor = 1.0;
- if (Optional<PseudoProbe> Probe = extractProbe(*CB))
+ if (std::optional<PseudoProbe> Probe = extractProbe(*CB))
Factor = Probe->Factor;
uint64_t CallsiteCount =
@@ -1284,7 +1320,7 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
return true;
}
-Optional<InlineCost>
+std::optional<InlineCost>
SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
std::unique_ptr<InlineAdvice> Advice = nullptr;
if (ExternalInlineAdvisor) {
@@ -1303,15 +1339,15 @@ SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
}
bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
- Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
- return Cost ? !!Cost.value() : false;
+ std::optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
+ return Cost ? !!*Cost : false;
}
InlineCost
SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
- if (Optional<InlineCost> ReplayCost =
+ if (std::optional<InlineCost> ReplayCost =
getExternalInlineAdvisorCost(*Candidate.CallInstr))
- return ReplayCost.value();
+ return *ReplayCost;
// Adjust threshold based on call site hotness, only do this for callsite
// prioritized inliner because otherwise cost-benefit check is done earlier.
int SampleThreshold = SampleColdCallSiteThreshold;
@@ -1387,7 +1423,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
CandidateQueue CQueue;
InlineCandidate NewCandidate;
for (auto &BB : F) {
- for (auto &I : BB.getInstList()) {
+ for (auto &I : BB) {
auto *CB = dyn_cast<CallBase>(&I);
if (!CB)
continue;
@@ -1409,7 +1445,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
if (ExternalInlineAdvisor)
SizeLimit = std::numeric_limits<unsigned>::max();
- DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
+ MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
// Perform iterative BFS call site prioritized inlining
bool Changed = false;
@@ -1466,7 +1502,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
ICPCount++;
Changed = true;
} else if (!ContextTracker) {
- LocalNotInlinedCallSites.try_emplace(I, FS);
+ LocalNotInlinedCallSites.insert({I, FS});
}
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
@@ -1479,7 +1515,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
}
Changed = true;
} else if (!ContextTracker) {
- LocalNotInlinedCallSites.try_emplace(I, Candidate.CalleeSamples);
+ LocalNotInlinedCallSites.insert({I, Candidate.CalleeSamples});
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
@@ -1505,11 +1541,11 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
}
void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
- DenseMap<CallBase *, const FunctionSamples *> NonInlinedCallSites,
+ MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
const Function &F) {
// Accumulate not inlined callsite information into notInlinedSamples
for (const auto &Pair : NonInlinedCallSites) {
- CallBase *I = Pair.getFirst();
+ CallBase *I = Pair.first;
Function *Callee = I->getCalledFunction();
if (!Callee || Callee->isDeclaration())
continue;
@@ -1521,7 +1557,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
<< "' into '" << ore::NV("Caller", &F) << "'");
++NumCSNotInlined;
- const FunctionSamples *FS = Pair.getSecond();
+ const FunctionSamples *FS = Pair.second;
if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {
continue;
}
@@ -1581,7 +1617,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
BasicBlock *BB = &BI;
if (BlockWeights[BB]) {
- for (auto &I : BB->getInstList()) {
+ for (auto &I : *BB) {
if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
continue;
if (!cast<CallBase>(I).getCalledFunction()) {
@@ -1600,7 +1636,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
// Prorate the callsite counts based on the pre-ICP distribution
// factor to reflect what is already done to the callsite before
// ICP, such as calliste cloning.
- if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+ if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
if (Probe->Factor < 1)
T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
}
@@ -1633,7 +1669,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
} else if (OverwriteExistingWeights || ProfileSampleBlockAccurate) {
// Set profile metadata (possibly annotated by LTO prelink) to zero or
// clear it for cold code.
- for (auto &I : BB->getInstList()) {
+ for (auto &I : *BB) {
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
if (cast<CallBase>(I).isIndirectCall())
I.setMetadata(LLVMContext::MD_prof, nullptr);
@@ -1704,10 +1740,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
}
}
- // FIXME: Re-enable for sample profiling after investigating why the sum
- // of branch weights can be 0
- //
- // misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
+ misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
@@ -2013,9 +2046,156 @@ bool SampleProfileLoader::doInitialization(Module &M,
}
}
+ if (ReportProfileStaleness || PersistProfileStaleness) {
+ MatchingManager =
+ std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
+ }
+
return true;
}
+void SampleProfileMatcher::detectProfileMismatch(const Function &F,
+ const FunctionSamples &FS) {
+ if (FunctionSamples::ProfileIsProbeBased) {
+ uint64_t Count = FS.getTotalSamples();
+ TotalFuncHashSamples += Count;
+ TotalProfiledFunc++;
+ if (!ProbeManager->profileIsValid(F, FS)) {
+ MismatchedFuncHashSamples += Count;
+ NumMismatchedFuncHash++;
+ return;
+ }
+ }
+
+ std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs;
+
+ // Go through all the callsites on the IR and flag the callsite if the target
+ // name is the same as the one in the profile.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
+ continue;
+
+ const auto *CB = dyn_cast<CallBase>(&I);
+ if (auto &DLoc = I.getDebugLoc()) {
+ LineLocation IRCallsite = FunctionSamples::getCallSiteIdentifier(DLoc);
+
+ StringRef CalleeName;
+ if (Function *Callee = CB->getCalledFunction())
+ CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
+
+ const auto CTM = FS.findCallTargetMapAt(IRCallsite);
+ const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite);
+
+ // Indirect call case.
+ if (CalleeName.empty()) {
+ // Since indirect call does not have the CalleeName, check
+ // conservatively if callsite in the profile is a callsite location.
+ // This is to avoid nums of false positive since otherwise all the
+ // indirect call samples will be reported as mismatching.
+ if ((CTM && !CTM->empty()) || (CallsiteFS && !CallsiteFS->empty()))
+ MatchedCallsiteLocs.insert(IRCallsite);
+ } else {
+ // Check if the call target name is matched for direct call case.
+ if ((CTM && CTM->count(CalleeName)) ||
+ (CallsiteFS && CallsiteFS->count(CalleeName)))
+ MatchedCallsiteLocs.insert(IRCallsite);
+ }
+ }
+ }
+ }
+
+ auto isInvalidLineOffset = [](uint32_t LineOffset) {
+ return LineOffset & 0x8000;
+ };
+
+ // Check if there are any callsites in the profile that does not match to any
+ // IR callsites, those callsite samples will be discarded.
+ for (auto &I : FS.getBodySamples()) {
+ const LineLocation &Loc = I.first;
+ if (isInvalidLineOffset(Loc.LineOffset))
+ continue;
+
+ uint64_t Count = I.second.getSamples();
+ if (!I.second.getCallTargets().empty()) {
+ TotalCallsiteSamples += Count;
+ TotalProfiledCallsites++;
+ if (!MatchedCallsiteLocs.count(Loc)) {
+ MismatchedCallsiteSamples += Count;
+ NumMismatchedCallsites++;
+ }
+ }
+ }
+
+ for (auto &I : FS.getCallsiteSamples()) {
+ const LineLocation &Loc = I.first;
+ if (isInvalidLineOffset(Loc.LineOffset))
+ continue;
+
+ uint64_t Count = 0;
+ for (auto &FM : I.second) {
+ Count += FM.second.getHeadSamplesEstimate();
+ }
+ TotalCallsiteSamples += Count;
+ TotalProfiledCallsites++;
+ if (!MatchedCallsiteLocs.count(Loc)) {
+ MismatchedCallsiteSamples += Count;
+ NumMismatchedCallsites++;
+ }
+ }
+}
+
+void SampleProfileMatcher::detectProfileMismatch() {
+ for (auto &F : M) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ continue;
+ FunctionSamples *FS = Reader.getSamplesFor(F);
+ if (!FS)
+ continue;
+ detectProfileMismatch(F, *FS);
+ }
+
+ if (ReportProfileStaleness) {
+ if (FunctionSamples::ProfileIsProbeBased) {
+ errs() << "(" << NumMismatchedFuncHash << "/" << TotalProfiledFunc << ")"
+ << " of functions' profile are invalid and "
+ << " (" << MismatchedFuncHashSamples << "/" << TotalFuncHashSamples
+ << ")"
+ << " of samples are discarded due to function hash mismatch.\n";
+ }
+ errs() << "(" << NumMismatchedCallsites << "/" << TotalProfiledCallsites
+ << ")"
+ << " of callsites' profile are invalid and "
+ << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples
+ << ")"
+ << " of samples are discarded due to callsite location mismatch.\n";
+ }
+
+ if (PersistProfileStaleness) {
+ LLVMContext &Ctx = M.getContext();
+ MDBuilder MDB(Ctx);
+
+ SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
+ if (FunctionSamples::ProfileIsProbeBased) {
+ ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash);
+ ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
+ ProfStatsVec.emplace_back("MismatchedFuncHashSamples",
+ MismatchedFuncHashSamples);
+ ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
+ }
+
+ ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
+ ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
+ ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
+ MismatchedCallsiteSamples);
+ ProfStatsVec.emplace_back("TotalCallsiteSamples", TotalCallsiteSamples);
+
+ auto *MD = MDB.createLLVMStats(ProfStatsVec);
+ auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
+ NMD->addOperand(MD);
+ }
+}
+
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI, CallGraph *CG) {
GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
@@ -2060,8 +2240,11 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
assert(SymbolMap.count(StringRef()) == 0 &&
"No empty StringRef should be added in SymbolMap");
+ if (ReportProfileStaleness || PersistProfileStaleness)
+ MatchingManager->detectProfileMismatch();
+
bool retval = false;
- for (auto F : buildFunctionOrder(M, CG)) {
+ for (auto *F : buildFunctionOrder(M, CG)) {
assert(!F->isDeclaration());
clearFunctionData();
retval |= runOnFunction(*F, AM);