aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO/SampleProfile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp569
1 files changed, 414 insertions, 155 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 93b368fd72a6..a53baecd4776 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -35,9 +35,9 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
-#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
@@ -58,8 +58,6 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/ValueSymbolTable.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
@@ -67,6 +65,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
@@ -129,6 +128,11 @@ static cl::opt<std::string> SampleProfileRemappingFile(
"sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
+static cl::opt<bool> SalvageStaleProfile(
+ "salvage-stale-profile", cl::Hidden, cl::init(false),
+ cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
+ "location for sample profile query."));
+
static cl::opt<bool> ReportProfileStaleness(
"report-profile-staleness", cl::Hidden, cl::init(false),
cl::desc("Compute and report stale profile statistical metrics."));
@@ -138,6 +142,11 @@ static cl::opt<bool> PersistProfileStaleness(
cl::desc("Compute stale profile statistical metrics and write it into the "
"native object file(.llvm_stats section)."));
+static cl::opt<bool> FlattenProfileForMatching(
+ "flatten-profile-for-matching", cl::Hidden, cl::init(true),
+ cl::desc(
+ "Use flattened profile for stale profile detection and matching."));
+
static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
@@ -173,9 +182,6 @@ static cl::opt<bool>
cl::desc("Process functions in a top-down order "
"defined by the profiled call graph when "
"-sample-profile-top-down-load is on."));
-cl::opt<bool>
- SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
- cl::desc("Sort profiled recursion by edge weights."));
static cl::opt<bool> ProfileSizeInline(
"sample-profile-inline-size", cl::Hidden, cl::init(false),
@@ -191,6 +197,11 @@ static cl::opt<bool> DisableSampleLoaderInlining(
"pass, and merge (or scale) profiles (as configured by "
"--sample-profile-merge-inlinee)."));
+namespace llvm {
+cl::opt<bool>
+ SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
+ cl::desc("Sort profiled recursion by edge weights."));
+
cl::opt<int> ProfileInlineGrowthLimit(
"sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
cl::desc("The size growth ratio limit for proirity-based sample profile "
@@ -214,6 +225,7 @@ cl::opt<int> SampleHotCallSiteThreshold(
cl::opt<int> SampleColdCallSiteThreshold(
"sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
cl::desc("Threshold for inlining cold callsites"));
+} // namespace llvm
static cl::opt<unsigned> ProfileICPRelativeHotness(
"sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
@@ -307,7 +319,9 @@ static cl::opt<bool> AnnotateSampleProfileInlinePhase(
cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
"sample-profile inline pass name."));
+namespace llvm {
extern cl::opt<bool> EnableExtTspBlockPlacement;
+}
namespace {
@@ -428,6 +442,11 @@ class SampleProfileMatcher {
Module &M;
SampleProfileReader &Reader;
const PseudoProbeManager *ProbeManager;
+ SampleProfileMap FlattenedProfiles;
+ // For each function, the matcher generates a map, of which each entry is a
+ // mapping from the source location of current build to the source location in
+ // the profile.
+ StringMap<LocToLocMap> FuncMappings;
// Profile mismatching statstics.
uint64_t TotalProfiledCallsites = 0;
@@ -442,9 +461,43 @@ class SampleProfileMatcher {
public:
SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
const PseudoProbeManager *ProbeManager)
- : M(M), Reader(Reader), ProbeManager(ProbeManager) {}
- void detectProfileMismatch();
- void detectProfileMismatch(const Function &F, const FunctionSamples &FS);
+ : M(M), Reader(Reader), ProbeManager(ProbeManager) {
+ if (FlattenProfileForMatching) {
+ ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
+ FunctionSamples::ProfileIsCS);
+ }
+ }
+ void runOnModule();
+
+private:
+ FunctionSamples *getFlattenedSamplesFor(const Function &F) {
+ StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
+ auto It = FlattenedProfiles.find(CanonFName);
+ if (It != FlattenedProfiles.end())
+ return &It->second;
+ return nullptr;
+ }
+ void runOnFunction(const Function &F, const FunctionSamples &FS);
+ void countProfileMismatches(
+ const FunctionSamples &FS,
+ const std::unordered_set<LineLocation, LineLocationHash>
+ &MatchedCallsiteLocs,
+ uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites);
+
+ LocToLocMap &getIRToProfileLocationMap(const Function &F) {
+ auto Ret = FuncMappings.try_emplace(
+ FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap());
+ return Ret.first->second;
+ }
+ void distributeIRToProfileLocationMap();
+ void distributeIRToProfileLocationMap(FunctionSamples &FS);
+ void populateProfileCallsites(
+ const FunctionSamples &FS,
+ StringMap<std::set<LineLocation>> &CalleeToCallsitesMap);
+ void runStaleProfileMatching(
+ const std::map<LineLocation, StringRef> &IRLocations,
+ StringMap<std::set<LineLocation>> &CalleeToCallsitesMap,
+ LocToLocMap &IRToProfileLocationMap);
};
/// Sample profile pass.
@@ -452,15 +505,16 @@ public:
/// This pass reads profile data from the file specified by
/// -sample-profile-file and annotates every affected function with the
/// profile information found in that file.
-class SampleProfileLoader final
- : public SampleProfileLoaderBaseImpl<BasicBlock> {
+class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
public:
SampleProfileLoader(
StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
std::function<const TargetLibraryInfo &(Function &)> GetTLI)
- : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)),
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
+ std::move(FS)),
GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
LTOPhase(LTOPhase),
@@ -471,13 +525,12 @@ public:
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
- ProfileSummaryInfo *_PSI, CallGraph *CG);
+ ProfileSummaryInfo *_PSI, LazyCallGraph &CG);
protected:
bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
bool emitAnnotations(Function &F);
ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
- ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
const FunctionSamples *
findFunctionSamples(const Instruction &I) const override;
@@ -512,8 +565,8 @@ protected:
void promoteMergeNotInlinedContextSamples(
MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
const Function &F);
- std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
- std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
+ std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
+ std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
void generateMDProfMetadata(Function &F);
/// Map from function name to Function *. Used to find the function from
@@ -573,9 +626,6 @@ protected:
// External inline advisor used to replay inline decision from remarks.
std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
- // A pseudo probe helper to correlate the imported sample counts.
- std::unique_ptr<PseudoProbeManager> ProbeManager;
-
// A helper to implement the sample profile matching algorithm.
std::unique_ptr<SampleProfileMatcher> MatchingManager;
@@ -586,6 +636,50 @@ private:
};
} // end anonymous namespace
+namespace llvm {
+template <>
+inline bool SampleProfileInference<Function>::isExit(const BasicBlock *BB) {
+ return succ_empty(BB);
+}
+
+template <>
+inline void SampleProfileInference<Function>::findUnlikelyJumps(
+ const std::vector<const BasicBlockT *> &BasicBlocks,
+ BlockEdgeMap &Successors, FlowFunction &Func) {
+ for (auto &Jump : Func.Jumps) {
+ const auto *BB = BasicBlocks[Jump.Source];
+ const auto *Succ = BasicBlocks[Jump.Target];
+ const Instruction *TI = BB->getTerminator();
+ // Check if a block ends with InvokeInst and mark non-taken branch unlikely.
+ // In that case block Succ should be a landing pad
+ if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) {
+ if (isa<InvokeInst>(TI)) {
+ Jump.IsUnlikely = true;
+ }
+ }
+ const Instruction *SuccTI = Succ->getTerminator();
+ // Check if the target block contains UnreachableInst and mark it unlikely
+ if (SuccTI->getNumSuccessors() == 0) {
+ if (isa<UnreachableInst>(SuccTI)) {
+ Jump.IsUnlikely = true;
+ }
+ }
+ }
+}
+
+template <>
+void SampleProfileLoaderBaseImpl<Function>::computeDominanceAndLoopInfo(
+ Function &F) {
+ DT.reset(new DominatorTree);
+ DT->recalculate(F);
+
+ PDT.reset(new PostDominatorTree(F));
+
+ LI.reset(new LoopInfo);
+ LI->analyze(*DT);
+}
+} // namespace llvm
+
ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
if (FunctionSamples::ProfileIsProbeBased)
return getProbeWeight(Inst);
@@ -614,68 +708,6 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
return getInstWeightImpl(Inst);
}
-// Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
-// of non-probe instruction. So if all instructions of the BB give error_code,
-// tell the inference algorithm to infer the BB weight.
-ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
- assert(FunctionSamples::ProfileIsProbeBased &&
- "Profile is not pseudo probe based");
- std::optional<PseudoProbe> Probe = extractProbe(Inst);
- // Ignore the non-probe instruction. If none of the instruction in the BB is
- // probe, we choose to infer the BB's weight.
- if (!Probe)
- return std::error_code();
-
- const FunctionSamples *FS = findFunctionSamples(Inst);
- // If none of the instruction has FunctionSample, we choose to return zero
- // value sample to indicate the BB is cold. This could happen when the
- // instruction is from inlinee and no profile data is found.
- // FIXME: This should not be affected by the source drift issue as 1) if the
- // newly added function is top-level inliner, it won't match the CFG checksum
- // in the function profile or 2) if it's the inlinee, the inlinee should have
- // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
- // we can improve it by adding a switch for profile-sample-block-accurate for
- // block level counts in the future.
- if (!FS)
- return 0;
-
- // For non-CS profile, If a direct call/invoke instruction is inlined in
- // profile (findCalleeFunctionSamples returns non-empty result), but not
- // inlined here, it means that the inlined callsite has no sample, thus the
- // call instruction should have 0 count.
- // For CS profile, the callsite count of previously inlined callees is
- // populated with the entry count of the callees.
- if (!FunctionSamples::ProfileIsCS)
- if (const auto *CB = dyn_cast<CallBase>(&Inst))
- if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
- return 0;
-
- const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
- if (R) {
- uint64_t Samples = R.get() * Probe->Factor;
- bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
- if (FirstMark) {
- ORE->emit([&]() {
- OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
- Remark << "Applied " << ore::NV("NumSamples", Samples);
- Remark << " samples from profile (ProbeId=";
- Remark << ore::NV("ProbeId", Probe->Id);
- Remark << ", Factor=";
- Remark << ore::NV("Factor", Probe->Factor);
- Remark << ", OriginalSamples=";
- Remark << ore::NV("OriginalSamples", R.get());
- Remark << ")";
- return Remark;
- });
- }
- LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
- << " - weight: " << R.get() << " - factor: "
- << format("%0.2f", Probe->Factor) << ")\n");
- return Samples;
- }
- return R;
-}
-
/// Get the FunctionSamples for a call instruction.
///
/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
@@ -1041,8 +1073,8 @@ void SampleProfileLoader::findExternalInlineCandidate(
DenseSet<GlobalValue::GUID> &InlinedGUIDs,
const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
- // If ExternalInlineAdvisor wants to inline an external function
- // make sure it's imported
+ // If ExternalInlineAdvisor(ReplayInlineAdvisor) wants to inline an external
+ // function make sure it's imported
if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
// Samples may not exist for replayed function, if so
// just add the direct GUID and move on
@@ -1055,7 +1087,13 @@ void SampleProfileLoader::findExternalInlineCandidate(
Threshold = 0;
}
- assert(Samples && "expect non-null caller profile");
+ // In some rare cases, call instruction could be changed after being pushed
+ // into inline candidate queue, this is because earlier inlining may expose
+ // constant propagation which can change indirect call to direct call. When
+ // this happens, we may fail to find matching function samples for the
+ // candidate later, even if a match was found when the candidate was enqueued.
+ if (!Samples)
+ return;
// For AutoFDO profile, retrieve candidate profiles by walking over
// the nested inlinee profiles.
@@ -1255,7 +1293,7 @@ bool SampleProfileLoader::tryInlineCandidate(
if (!Cost)
return false;
- InlineFunctionInfo IFI(nullptr, GetAC);
+ InlineFunctionInfo IFI(GetAC);
IFI.UpdateProfile = false;
InlineResult IR = InlineFunction(CB, IFI,
/*MergeAttributes=*/true);
@@ -1784,9 +1822,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
if (!ProbeManager->profileIsValid(F, *Samples)) {
LLVM_DEBUG(
dbgs() << "Profile is invalid due to CFG mismatch for Function "
- << F.getName());
+ << F.getName() << "\n");
++NumMismatchedProfile;
- return false;
+ if (!SalvageStaleProfile)
+ return false;
}
++NumMatchedProfile;
} else {
@@ -1813,7 +1852,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
}
std::unique_ptr<ProfiledCallGraph>
-SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
+SampleProfileLoader::buildProfiledCallGraph(Module &M) {
std::unique_ptr<ProfiledCallGraph> ProfiledCG;
if (FunctionSamples::ProfileIsCS)
ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
@@ -1823,18 +1862,17 @@ SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
// Add all functions into the profiled call graph even if they are not in
// the profile. This makes sure functions missing from the profile still
// gets a chance to be processed.
- for (auto &Node : CG) {
- const auto *F = Node.first;
- if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
+ for (Function &F : M) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F));
+ ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(F));
}
return ProfiledCG;
}
std::vector<Function *>
-SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
+SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
std::vector<Function *> FunctionOrderList;
FunctionOrderList.reserve(M.size());
@@ -1842,7 +1880,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
"together with -sample-profile-top-down-load.\n";
- if (!ProfileTopDownLoad || CG == nullptr) {
+ if (!ProfileTopDownLoad) {
if (ProfileMergeInlinee) {
// Disable ProfileMergeInlinee if profile is not loaded in top down order,
// because the profile for a function may be used for the profile
@@ -1858,8 +1896,6 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
return FunctionOrderList;
}
- assert(&CG->getModule() == &M);
-
if (UseProfiledCallGraph || (FunctionSamples::ProfileIsCS &&
!UseProfiledCallGraph.getNumOccurrences())) {
// Use profiled call edges to augment the top-down order. There are cases
@@ -1910,7 +1946,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
// static call edges are not so important when they don't correspond to a
// context in the profile.
- std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
+ std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
while (!CGI.isAtEnd()) {
auto Range = *CGI;
@@ -1927,25 +1963,27 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
++CGI;
}
} else {
- scc_iterator<CallGraph *> CGI = scc_begin(CG);
- while (!CGI.isAtEnd()) {
- for (CallGraphNode *Node : *CGI) {
- auto *F = Node->getFunction();
- if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
- FunctionOrderList.push_back(F);
+ CG.buildRefSCCs();
+ for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+ for (LazyCallGraph::SCC &C : RC) {
+ for (LazyCallGraph::Node &N : C) {
+ Function &F = N.getFunction();
+ if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
+ FunctionOrderList.push_back(&F);
+ }
}
- ++CGI;
}
}
+ std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
+
LLVM_DEBUG({
dbgs() << "Function processing order:\n";
- for (auto F : reverse(FunctionOrderList)) {
+ for (auto F : FunctionOrderList) {
dbgs() << F->getName() << "\n";
}
});
- std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
return FunctionOrderList;
}
@@ -1954,7 +1992,7 @@ bool SampleProfileLoader::doInitialization(Module &M,
auto &Ctx = M.getContext();
auto ReaderOrErr = SampleProfileReader::create(
- Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename);
+ Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
@@ -2016,6 +2054,16 @@ bool SampleProfileLoader::doInitialization(Module &M,
UsePreInlinerDecision = true;
}
+ // Enable stale profile matching by default for probe-based profile.
+ // Currently the matching relies on if the checksum mismatch is detected,
+ // which is currently only available for pseudo-probe mode. Removing the
+ // checksum check could cause regressions for some cases, so further tuning
+ // might be needed if we want to enable it for all cases.
+ if (Reader->profileIsProbeBased() &&
+ !SalvageStaleProfile.getNumOccurrences()) {
+ SalvageStaleProfile = true;
+ }
+
if (!Reader->profileIsCS()) {
// Non-CS profile should be fine without a function size budget for the
// inliner since the contexts in the profile are either all from inlining
@@ -2046,7 +2094,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
}
}
- if (ReportProfileStaleness || PersistProfileStaleness) {
+ if (ReportProfileStaleness || PersistProfileStaleness ||
+ SalvageStaleProfile) {
MatchingManager =
std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
}
@@ -2054,8 +2103,167 @@ bool SampleProfileLoader::doInitialization(Module &M,
return true;
}
-void SampleProfileMatcher::detectProfileMismatch(const Function &F,
- const FunctionSamples &FS) {
+void SampleProfileMatcher::countProfileMismatches(
+ const FunctionSamples &FS,
+ const std::unordered_set<LineLocation, LineLocationHash>
+ &MatchedCallsiteLocs,
+ uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) {
+
+ auto isInvalidLineOffset = [](uint32_t LineOffset) {
+ return LineOffset & 0x8000;
+ };
+
+ // Check if there are any callsites in the profile that does not match to any
+ // IR callsites, those callsite samples will be discarded.
+ for (auto &I : FS.getBodySamples()) {
+ const LineLocation &Loc = I.first;
+ if (isInvalidLineOffset(Loc.LineOffset))
+ continue;
+
+ uint64_t Count = I.second.getSamples();
+ if (!I.second.getCallTargets().empty()) {
+ TotalCallsiteSamples += Count;
+ FuncProfiledCallsites++;
+ if (!MatchedCallsiteLocs.count(Loc)) {
+ MismatchedCallsiteSamples += Count;
+ FuncMismatchedCallsites++;
+ }
+ }
+ }
+
+ for (auto &I : FS.getCallsiteSamples()) {
+ const LineLocation &Loc = I.first;
+ if (isInvalidLineOffset(Loc.LineOffset))
+ continue;
+
+ uint64_t Count = 0;
+ for (auto &FM : I.second) {
+ Count += FM.second.getHeadSamplesEstimate();
+ }
+ TotalCallsiteSamples += Count;
+ FuncProfiledCallsites++;
+ if (!MatchedCallsiteLocs.count(Loc)) {
+ MismatchedCallsiteSamples += Count;
+ FuncMismatchedCallsites++;
+ }
+ }
+}
+
+// Populate the anchors(direct callee name) from profile.
+void SampleProfileMatcher::populateProfileCallsites(
+ const FunctionSamples &FS,
+ StringMap<std::set<LineLocation>> &CalleeToCallsitesMap) {
+ for (const auto &I : FS.getBodySamples()) {
+ const auto &Loc = I.first;
+ const auto &CTM = I.second.getCallTargets();
+ // Filter out possible indirect calls, use direct callee name as anchor.
+ if (CTM.size() == 1) {
+ StringRef CalleeName = CTM.begin()->first();
+ const auto &Candidates = CalleeToCallsitesMap.try_emplace(
+ CalleeName, std::set<LineLocation>());
+ Candidates.first->second.insert(Loc);
+ }
+ }
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ const LineLocation &Loc = I.first;
+ const auto &CalleeMap = I.second;
+ // Filter out possible indirect calls, use direct callee name as anchor.
+ if (CalleeMap.size() == 1) {
+ StringRef CalleeName = CalleeMap.begin()->first;
+ const auto &Candidates = CalleeToCallsitesMap.try_emplace(
+ CalleeName, std::set<LineLocation>());
+ Candidates.first->second.insert(Loc);
+ }
+ }
+}
+
+// Call target name anchor based profile fuzzy matching.
+// Input:
+// For IR locations, the anchor is the callee name of direct callsite; For
+// profile locations, it's the call target name for BodySamples or inlinee's
+// profile name for CallsiteSamples.
+// Matching heuristic:
+// First match all the anchors in lexical order, then split the non-anchor
+// locations between the two anchors evenly, first half are matched based on the
+// start anchor, second half are matched based on the end anchor.
+// For example, given:
+// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
+// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
+// The matching gives:
+// [1, 2(foo), 3, 5, 6(bar), 7]
+// | | | | | |
+// [1, 2, 3(foo), 4, 7, 8(bar), 9]
+// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
+void SampleProfileMatcher::runStaleProfileMatching(
+ const std::map<LineLocation, StringRef> &IRLocations,
+ StringMap<std::set<LineLocation>> &CalleeToCallsitesMap,
+ LocToLocMap &IRToProfileLocationMap) {
+ assert(IRToProfileLocationMap.empty() &&
+ "Run stale profile matching only once per function");
+
+ auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
+ // Skip the unchanged location mapping to save memory.
+ if (From != To)
+ IRToProfileLocationMap.insert({From, To});
+ };
+
+ // Use function's beginning location as the initial anchor.
+ int32_t LocationDelta = 0;
+ SmallVector<LineLocation> LastMatchedNonAnchors;
+
+ for (const auto &IR : IRLocations) {
+ const auto &Loc = IR.first;
+ StringRef CalleeName = IR.second;
+ bool IsMatchedAnchor = false;
+ // Match the anchor location in lexical order.
+ if (!CalleeName.empty()) {
+ auto ProfileAnchors = CalleeToCallsitesMap.find(CalleeName);
+ if (ProfileAnchors != CalleeToCallsitesMap.end() &&
+ !ProfileAnchors->second.empty()) {
+ auto CI = ProfileAnchors->second.begin();
+ const auto Candidate = *CI;
+ ProfileAnchors->second.erase(CI);
+ InsertMatching(Loc, Candidate);
+ LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
+ << " is matched from " << Loc << " to " << Candidate
+ << "\n");
+ LocationDelta = Candidate.LineOffset - Loc.LineOffset;
+
+ // Match backwards for non-anchor locations.
+ // The locations in LastMatchedNonAnchors have been matched forwards
+ // based on the previous anchor, spilt it evenly and overwrite the
+ // second half based on the current anchor.
+ for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
+ I < LastMatchedNonAnchors.size(); I++) {
+ const auto &L = LastMatchedNonAnchors[I];
+ uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
+ LineLocation Candidate(CandidateLineOffset, L.Discriminator);
+ InsertMatching(L, Candidate);
+ LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
+ << " to " << Candidate << "\n");
+ }
+
+ IsMatchedAnchor = true;
+ LastMatchedNonAnchors.clear();
+ }
+ }
+
+ // Match forwards for non-anchor locations.
+ if (!IsMatchedAnchor) {
+ uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
+ LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
+ InsertMatching(Loc, Candidate);
+ LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
+ << Candidate << "\n");
+ LastMatchedNonAnchors.emplace_back(Loc);
+ }
+ }
+}
+
+void SampleProfileMatcher::runOnFunction(const Function &F,
+ const FunctionSamples &FS) {
+ bool IsFuncHashMismatch = false;
if (FunctionSamples::ProfileIsProbeBased) {
uint64_t Count = FS.getTotalSamples();
TotalFuncHashSamples += Count;
@@ -2063,16 +2271,24 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,
if (!ProbeManager->profileIsValid(F, FS)) {
MismatchedFuncHashSamples += Count;
NumMismatchedFuncHash++;
- return;
+ IsFuncHashMismatch = true;
}
}
std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs;
+ // The value of the map is the name of direct callsite and use empty StringRef
+ // for non-direct-call site.
+ std::map<LineLocation, StringRef> IRLocations;
- // Go through all the callsites on the IR and flag the callsite if the target
- // name is the same as the one in the profile.
+ // Extract profile matching anchors and profile mismatch metrics in the IR.
for (auto &BB : F) {
for (auto &I : BB) {
+ // TODO: Support line-number based location(AutoFDO).
+ if (FunctionSamples::ProfileIsProbeBased && isa<PseudoProbeInst>(&I)) {
+ if (std::optional<PseudoProbe> Probe = extractProbe(I))
+ IRLocations.emplace(LineLocation(Probe->Id, 0), StringRef());
+ }
+
if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
continue;
@@ -2084,6 +2300,17 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,
if (Function *Callee = CB->getCalledFunction())
CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
+ // Force to overwrite the callee name in case any non-call location was
+ // written before.
+ auto R = IRLocations.emplace(IRCallsite, CalleeName);
+ R.first->second = CalleeName;
+ assert((!FunctionSamples::ProfileIsProbeBased || R.second ||
+ R.first->second == CalleeName) &&
+ "Overwrite non-call or different callee name location for "
+ "pseudo probe callsite");
+
+ // Go through all the callsites on the IR and flag the callsite if the
+ // target name is the same as the one in the profile.
const auto CTM = FS.findCallTargetMapAt(IRCallsite);
const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite);
@@ -2105,55 +2332,54 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,
}
}
- auto isInvalidLineOffset = [](uint32_t LineOffset) {
- return LineOffset & 0x8000;
- };
+ // Detect profile mismatch for profile staleness metrics report.
+ if (ReportProfileStaleness || PersistProfileStaleness) {
+ uint64_t FuncMismatchedCallsites = 0;
+ uint64_t FuncProfiledCallsites = 0;
+ countProfileMismatches(FS, MatchedCallsiteLocs, FuncMismatchedCallsites,
+ FuncProfiledCallsites);
+ TotalProfiledCallsites += FuncProfiledCallsites;
+ NumMismatchedCallsites += FuncMismatchedCallsites;
+ LLVM_DEBUG({
+ if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch &&
+ FuncMismatchedCallsites)
+ dbgs() << "Function checksum is matched but there are "
+ << FuncMismatchedCallsites << "/" << FuncProfiledCallsites
+ << " mismatched callsites.\n";
+ });
+ }
- // Check if there are any callsites in the profile that does not match to any
- // IR callsites, those callsite samples will be discarded.
- for (auto &I : FS.getBodySamples()) {
- const LineLocation &Loc = I.first;
- if (isInvalidLineOffset(Loc.LineOffset))
- continue;
+ if (IsFuncHashMismatch && SalvageStaleProfile) {
+ LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
+ << "\n");
- uint64_t Count = I.second.getSamples();
- if (!I.second.getCallTargets().empty()) {
- TotalCallsiteSamples += Count;
- TotalProfiledCallsites++;
- if (!MatchedCallsiteLocs.count(Loc)) {
- MismatchedCallsiteSamples += Count;
- NumMismatchedCallsites++;
- }
- }
- }
+ StringMap<std::set<LineLocation>> CalleeToCallsitesMap;
+ populateProfileCallsites(FS, CalleeToCallsitesMap);
- for (auto &I : FS.getCallsiteSamples()) {
- const LineLocation &Loc = I.first;
- if (isInvalidLineOffset(Loc.LineOffset))
- continue;
+ // The matching result will be saved to IRToProfileLocationMap, create a new
+ // map for each function.
+ auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
- uint64_t Count = 0;
- for (auto &FM : I.second) {
- Count += FM.second.getHeadSamplesEstimate();
- }
- TotalCallsiteSamples += Count;
- TotalProfiledCallsites++;
- if (!MatchedCallsiteLocs.count(Loc)) {
- MismatchedCallsiteSamples += Count;
- NumMismatchedCallsites++;
- }
+ runStaleProfileMatching(IRLocations, CalleeToCallsitesMap,
+ IRToProfileLocationMap);
}
}
-void SampleProfileMatcher::detectProfileMismatch() {
+void SampleProfileMatcher::runOnModule() {
for (auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- FunctionSamples *FS = Reader.getSamplesFor(F);
+ FunctionSamples *FS = nullptr;
+ if (FlattenProfileForMatching)
+ FS = getFlattenedSamplesFor(F);
+ else
+ FS = Reader.getSamplesFor(F);
if (!FS)
continue;
- detectProfileMismatch(F, *FS);
+ runOnFunction(F, *FS);
}
+ if (SalvageStaleProfile)
+ distributeIRToProfileLocationMap();
if (ReportProfileStaleness) {
if (FunctionSamples::ProfileIsProbeBased) {
@@ -2196,8 +2422,31 @@ void SampleProfileMatcher::detectProfileMismatch() {
}
}
+void SampleProfileMatcher::distributeIRToProfileLocationMap(
+ FunctionSamples &FS) {
+ const auto ProfileMappings = FuncMappings.find(FS.getName());
+ if (ProfileMappings != FuncMappings.end()) {
+ FS.setIRToProfileLocationMap(&(ProfileMappings->second));
+ }
+
+ for (auto &Inlinees : FS.getCallsiteSamples()) {
+ for (auto FS : Inlinees.second) {
+ distributeIRToProfileLocationMap(FS.second);
+ }
+ }
+}
+
+// Use a central place to distribute the matching results. Outlined and inlined
+// profile with the function name will be set to the same pointer.
+void SampleProfileMatcher::distributeIRToProfileLocationMap() {
+ for (auto &I : Reader.getProfiles()) {
+ distributeIRToProfileLocationMap(I.second);
+ }
+}
+
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
- ProfileSummaryInfo *_PSI, CallGraph *CG) {
+ ProfileSummaryInfo *_PSI,
+ LazyCallGraph &CG) {
GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
PSI = _PSI;
@@ -2240,8 +2489,10 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
assert(SymbolMap.count(StringRef()) == 0 &&
"No empty StringRef should be added in SymbolMap");
- if (ReportProfileStaleness || PersistProfileStaleness)
- MatchingManager->detectProfileMismatch();
+ if (ReportProfileStaleness || PersistProfileStaleness ||
+ SalvageStaleProfile) {
+ MatchingManager->runOnModule();
+ }
bool retval = false;
for (auto *F : buildFunctionOrder(M, CG)) {
@@ -2327,6 +2578,11 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
return emitAnnotations(F);
return false;
}
+SampleProfileLoaderPass::SampleProfileLoaderPass(
+ std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
+ LTOPhase(LTOPhase), FS(std::move(FS)) {}
PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ModuleAnalysisManager &AM) {
@@ -2343,18 +2599,21 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
return FAM.getResult<TargetLibraryAnalysis>(F);
};
+ if (!FS)
+ FS = vfs::getRealFileSystem();
+
SampleProfileLoader SampleLoader(
ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
: ProfileRemappingFileName,
- LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
+ LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);
if (!SampleLoader.doInitialization(M, &FAM))
return PreservedAnalyses::all();
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
- CallGraph &CG = AM.getResult<CallGraphAnalysis>(M);
- if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
+ LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
+ if (!SampleLoader.runOnModule(M, &AM, PSI, CG))
return PreservedAnalyses::all();
return PreservedAnalyses::none();