diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-07-26 19:03:47 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2023-07-26 19:04:23 +0000 | 
| commit | 7fa27ce4a07f19b07799a767fc29416f3b625afb (patch) | |
| tree | 27825c83636c4de341eb09a74f49f5d38a15d165 /llvm/lib/Transforms/IPO/SampleProfile.cpp | |
| parent | e3b557809604d036af6e00c60f012c2025b59a5e (diff) | |
Diffstat (limited to 'llvm/lib/Transforms/IPO/SampleProfile.cpp')
| -rw-r--r-- | llvm/lib/Transforms/IPO/SampleProfile.cpp | 569 | 
1 files changed, 414 insertions, 155 deletions
| diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 93b368fd72a6..a53baecd4776 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -35,9 +35,9 @@  #include "llvm/ADT/Twine.h"  #include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/Analysis/CallGraph.h"  #include "llvm/Analysis/InlineAdvisor.h"  #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/LazyCallGraph.h"  #include "llvm/Analysis/OptimizationRemarkEmitter.h"  #include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/Analysis/ReplayInlineAdvisor.h" @@ -58,8 +58,6 @@  #include "llvm/IR/PassManager.h"  #include "llvm/IR/PseudoProbe.h"  #include "llvm/IR/ValueSymbolTable.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h"  #include "llvm/ProfileData/InstrProf.h"  #include "llvm/ProfileData/SampleProf.h"  #include "llvm/ProfileData/SampleProfReader.h" @@ -67,6 +65,7 @@  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/VirtualFileSystem.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/IPO.h"  #include "llvm/Transforms/IPO/ProfiledCallGraph.h" @@ -129,6 +128,11 @@ static cl::opt<std::string> SampleProfileRemappingFile(      "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),      cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden); +static cl::opt<bool> SalvageStaleProfile( +    "salvage-stale-profile", cl::Hidden, cl::init(false), +    cl::desc("Salvage stale profile by fuzzy matching and use the remapped " +             "location for sample profile query.")); +  static cl::opt<bool> ReportProfileStaleness(      "report-profile-staleness", cl::Hidden, cl::init(false),      cl::desc("Compute and report stale profile statistical metrics.")); @@ -138,6 +142,11 @@ static cl::opt<bool> PersistProfileStaleness(      cl::desc("Compute stale profile statistical metrics and write it into the "               "native object file(.llvm_stats section).")); +static cl::opt<bool> FlattenProfileForMatching( +    "flatten-profile-for-matching", cl::Hidden, cl::init(true), +    cl::desc( +        "Use flattened profile for stale profile detection and matching.")); +  static cl::opt<bool> ProfileSampleAccurate(      "profile-sample-accurate", cl::Hidden, cl::init(false),      cl::desc("If the sample profile is accurate, we will mark all un-sampled " @@ -173,9 +182,6 @@ static cl::opt<bool>                           cl::desc("Process functions in a top-down order "                                    "defined by the profiled call graph when "                                    "-sample-profile-top-down-load is on.")); -cl::opt<bool> -    SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, -                    cl::desc("Sort profiled recursion by edge weights."));  static cl::opt<bool> ProfileSizeInline(      "sample-profile-inline-size", cl::Hidden, cl::init(false), @@ -191,6 +197,11 @@ static cl::opt<bool> DisableSampleLoaderInlining(               "pass, and merge (or scale) profiles (as configured by "               "--sample-profile-merge-inlinee).")); +namespace llvm { +cl::opt<bool> +    SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, +                    cl::desc("Sort profiled recursion by edge weights.")); +  cl::opt<int> ProfileInlineGrowthLimit(      "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),      cl::desc("The size growth ratio limit for proirity-based sample profile " @@ -214,6 +225,7 @@ cl::opt<int> SampleHotCallSiteThreshold(  cl::opt<int> SampleColdCallSiteThreshold(      "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),      cl::desc("Threshold for inlining cold callsites")); +} // namespace llvm  static cl::opt<unsigned> ProfileICPRelativeHotness(      "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), @@ -307,7 +319,9 @@ static cl::opt<bool> AnnotateSampleProfileInlinePhase(      cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "               "sample-profile inline pass name.")); +namespace llvm {  extern cl::opt<bool> EnableExtTspBlockPlacement; +}  namespace { @@ -428,6 +442,11 @@ class SampleProfileMatcher {    Module &M;    SampleProfileReader &Reader;    const PseudoProbeManager *ProbeManager; +  SampleProfileMap FlattenedProfiles; +  // For each function, the matcher generates a map, of which each entry is a +  // mapping from the source location of current build to the source location in +  // the profile. +  StringMap<LocToLocMap> FuncMappings;    // Profile mismatching statstics.    uint64_t TotalProfiledCallsites = 0; @@ -442,9 +461,43 @@ class SampleProfileMatcher {  public:    SampleProfileMatcher(Module &M, SampleProfileReader &Reader,                         const PseudoProbeManager *ProbeManager) -      : M(M), Reader(Reader), ProbeManager(ProbeManager) {} -  void detectProfileMismatch(); -  void detectProfileMismatch(const Function &F, const FunctionSamples &FS); +      : M(M), Reader(Reader), ProbeManager(ProbeManager) { +    if (FlattenProfileForMatching) { +      ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, +                                       FunctionSamples::ProfileIsCS); +    } +  } +  void runOnModule(); + +private: +  FunctionSamples *getFlattenedSamplesFor(const Function &F) { +    StringRef CanonFName = FunctionSamples::getCanonicalFnName(F); +    auto It = FlattenedProfiles.find(CanonFName); +    if (It != FlattenedProfiles.end()) +      return &It->second; +    return nullptr; +  } +  void runOnFunction(const Function &F, const FunctionSamples &FS); +  void countProfileMismatches( +      const FunctionSamples &FS, +      const std::unordered_set<LineLocation, LineLocationHash> +          &MatchedCallsiteLocs, +      uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites); + +  LocToLocMap &getIRToProfileLocationMap(const Function &F) { +    auto Ret = FuncMappings.try_emplace( +        FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap()); +    return Ret.first->second; +  } +  void distributeIRToProfileLocationMap(); +  void distributeIRToProfileLocationMap(FunctionSamples &FS); +  void populateProfileCallsites( +      const FunctionSamples &FS, +      StringMap<std::set<LineLocation>> &CalleeToCallsitesMap); +  void runStaleProfileMatching( +      const std::map<LineLocation, StringRef> &IRLocations, +      StringMap<std::set<LineLocation>> &CalleeToCallsitesMap, +      LocToLocMap &IRToProfileLocationMap);  };  /// Sample profile pass. @@ -452,15 +505,16 @@ public:  /// This pass reads profile data from the file specified by  /// -sample-profile-file and annotates every affected function with the  /// profile information found in that file. -class SampleProfileLoader final -    : public SampleProfileLoaderBaseImpl<BasicBlock> { +class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {  public:    SampleProfileLoader(        StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase, +      IntrusiveRefCntPtr<vfs::FileSystem> FS,        std::function<AssumptionCache &(Function &)> GetAssumptionCache,        std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,        std::function<const TargetLibraryInfo &(Function &)> GetTLI) -      : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)), +      : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName), +                                    std::move(FS)),          GetAC(std::move(GetAssumptionCache)),          GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),          LTOPhase(LTOPhase), @@ -471,13 +525,12 @@ public:    bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);    bool runOnModule(Module &M, ModuleAnalysisManager *AM, -                   ProfileSummaryInfo *_PSI, CallGraph *CG); +                   ProfileSummaryInfo *_PSI, LazyCallGraph &CG);  protected:    bool runOnFunction(Function &F, ModuleAnalysisManager *AM);    bool emitAnnotations(Function &F);    ErrorOr<uint64_t> getInstWeight(const Instruction &I) override; -  ErrorOr<uint64_t> getProbeWeight(const Instruction &I);    const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;    const FunctionSamples *    findFunctionSamples(const Instruction &I) const override; @@ -512,8 +565,8 @@ protected:    void promoteMergeNotInlinedContextSamples(        MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,        const Function &F); -  std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG); -  std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG); +  std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG); +  std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);    void generateMDProfMetadata(Function &F);    /// Map from function name to Function *. Used to find the function from @@ -573,9 +626,6 @@ protected:    // External inline advisor used to replay inline decision from remarks.    std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor; -  // A pseudo probe helper to correlate the imported sample counts. -  std::unique_ptr<PseudoProbeManager> ProbeManager; -    // A helper to implement the sample profile matching algorithm.    std::unique_ptr<SampleProfileMatcher> MatchingManager; @@ -586,6 +636,50 @@ private:  };  } // end anonymous namespace +namespace llvm { +template <> +inline bool SampleProfileInference<Function>::isExit(const BasicBlock *BB) { +  return succ_empty(BB); +} + +template <> +inline void SampleProfileInference<Function>::findUnlikelyJumps( +    const std::vector<const BasicBlockT *> &BasicBlocks, +    BlockEdgeMap &Successors, FlowFunction &Func) { +  for (auto &Jump : Func.Jumps) { +    const auto *BB = BasicBlocks[Jump.Source]; +    const auto *Succ = BasicBlocks[Jump.Target]; +    const Instruction *TI = BB->getTerminator(); +    // Check if a block ends with InvokeInst and mark non-taken branch unlikely. +    // In that case block Succ should be a landing pad +    if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) { +      if (isa<InvokeInst>(TI)) { +        Jump.IsUnlikely = true; +      } +    } +    const Instruction *SuccTI = Succ->getTerminator(); +    // Check if the target block contains UnreachableInst and mark it unlikely +    if (SuccTI->getNumSuccessors() == 0) { +      if (isa<UnreachableInst>(SuccTI)) { +        Jump.IsUnlikely = true; +      } +    } +  } +} + +template <> +void SampleProfileLoaderBaseImpl<Function>::computeDominanceAndLoopInfo( +    Function &F) { +  DT.reset(new DominatorTree); +  DT->recalculate(F); + +  PDT.reset(new PostDominatorTree(F)); + +  LI.reset(new LoopInfo); +  LI->analyze(*DT); +} +} // namespace llvm +  ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {    if (FunctionSamples::ProfileIsProbeBased)      return getProbeWeight(Inst); @@ -614,68 +708,6 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {    return getInstWeightImpl(Inst);  } -// Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight -// of non-probe instruction. So if all instructions of the BB give error_code, -// tell the inference algorithm to infer the BB weight. -ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) { -  assert(FunctionSamples::ProfileIsProbeBased && -         "Profile is not pseudo probe based"); -  std::optional<PseudoProbe> Probe = extractProbe(Inst); -  // Ignore the non-probe instruction. If none of the instruction in the BB is -  // probe, we choose to infer the BB's weight. -  if (!Probe) -    return std::error_code(); - -  const FunctionSamples *FS = findFunctionSamples(Inst); -  // If none of the instruction has FunctionSample, we choose to return zero -  // value sample to indicate the BB is cold. This could happen when the -  // instruction is from inlinee and no profile data is found. -  // FIXME: This should not be affected by the source drift issue as 1) if the -  // newly added function is top-level inliner, it won't match the CFG checksum -  // in the function profile or 2) if it's the inlinee, the inlinee should have -  // a profile, otherwise it wouldn't be inlined. For non-probe based profile, -  // we can improve it by adding a switch for profile-sample-block-accurate for -  // block level counts in the future. -  if (!FS) -    return 0; - -  // For non-CS profile, If a direct call/invoke instruction is inlined in -  // profile (findCalleeFunctionSamples returns non-empty result), but not -  // inlined here, it means that the inlined callsite has no sample, thus the -  // call instruction should have 0 count. -  // For CS profile, the callsite count of previously inlined callees is -  // populated with the entry count of the callees. -  if (!FunctionSamples::ProfileIsCS) -    if (const auto *CB = dyn_cast<CallBase>(&Inst)) -      if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) -        return 0; - -  const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0); -  if (R) { -    uint64_t Samples = R.get() * Probe->Factor; -    bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples); -    if (FirstMark) { -      ORE->emit([&]() { -        OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst); -        Remark << "Applied " << ore::NV("NumSamples", Samples); -        Remark << " samples from profile (ProbeId="; -        Remark << ore::NV("ProbeId", Probe->Id); -        Remark << ", Factor="; -        Remark << ore::NV("Factor", Probe->Factor); -        Remark << ", OriginalSamples="; -        Remark << ore::NV("OriginalSamples", R.get()); -        Remark << ")"; -        return Remark; -      }); -    } -    LLVM_DEBUG(dbgs() << "    " << Probe->Id << ":" << Inst -                      << " - weight: " << R.get() << " - factor: " -                      << format("%0.2f", Probe->Factor) << ")\n"); -    return Samples; -  } -  return R; -} -  /// Get the FunctionSamples for a call instruction.  ///  /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined @@ -1041,8 +1073,8 @@ void SampleProfileLoader::findExternalInlineCandidate(      DenseSet<GlobalValue::GUID> &InlinedGUIDs,      const StringMap<Function *> &SymbolMap, uint64_t Threshold) { -  // If ExternalInlineAdvisor wants to inline an external function -  // make sure it's imported +  // If ExternalInlineAdvisor(ReplayInlineAdvisor) wants to inline an external +  // function make sure it's imported    if (CB && getExternalInlineAdvisorShouldInline(*CB)) {      // Samples may not exist for replayed function, if so      // just add the direct GUID and move on @@ -1055,7 +1087,13 @@ void SampleProfileLoader::findExternalInlineCandidate(      Threshold = 0;    } -  assert(Samples && "expect non-null caller profile"); +  // In some rare cases, call instruction could be changed after being pushed +  // into inline candidate queue, this is because earlier inlining may expose +  // constant propagation which can change indirect call to direct call. When +  // this happens, we may fail to find matching function samples for the +  // candidate later, even if a match was found when the candidate was enqueued. +  if (!Samples) +    return;    // For AutoFDO profile, retrieve candidate profiles by walking over    // the nested inlinee profiles. @@ -1255,7 +1293,7 @@ bool SampleProfileLoader::tryInlineCandidate(    if (!Cost)      return false; -  InlineFunctionInfo IFI(nullptr, GetAC); +  InlineFunctionInfo IFI(GetAC);    IFI.UpdateProfile = false;    InlineResult IR = InlineFunction(CB, IFI,                                     /*MergeAttributes=*/true); @@ -1784,9 +1822,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {      if (!ProbeManager->profileIsValid(F, *Samples)) {        LLVM_DEBUG(            dbgs() << "Profile is invalid due to CFG mismatch for Function " -                 << F.getName()); +                 << F.getName() << "\n");        ++NumMismatchedProfile; -      return false; +      if (!SalvageStaleProfile) +        return false;      }      ++NumMatchedProfile;    } else { @@ -1813,7 +1852,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {  }  std::unique_ptr<ProfiledCallGraph> -SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { +SampleProfileLoader::buildProfiledCallGraph(Module &M) {    std::unique_ptr<ProfiledCallGraph> ProfiledCG;    if (FunctionSamples::ProfileIsCS)      ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker); @@ -1823,18 +1862,17 @@ SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {    // Add all functions into the profiled call graph even if they are not in    // the profile. This makes sure functions missing from the profile still    // gets a chance to be processed. -  for (auto &Node : CG) { -    const auto *F = Node.first; -    if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile")) +  for (Function &F : M) { +    if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))        continue; -    ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F)); +    ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(F));    }    return ProfiledCG;  }  std::vector<Function *> -SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { +SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {    std::vector<Function *> FunctionOrderList;    FunctionOrderList.reserve(M.size()); @@ -1842,7 +1880,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {      errs() << "WARNING: -use-profiled-call-graph ignored, should be used "                "together with -sample-profile-top-down-load.\n"; -  if (!ProfileTopDownLoad || CG == nullptr) { +  if (!ProfileTopDownLoad) {      if (ProfileMergeInlinee) {        // Disable ProfileMergeInlinee if profile is not loaded in top down order,        // because the profile for a function may be used for the profile @@ -1858,8 +1896,6 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {      return FunctionOrderList;    } -  assert(&CG->getModule() == &M); -    if (UseProfiledCallGraph || (FunctionSamples::ProfileIsCS &&                                 !UseProfiledCallGraph.getNumOccurrences())) {      // Use profiled call edges to augment the top-down order. There are cases @@ -1910,7 +1946,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {      // static call edges are not so important when they don't correspond to a      // context in the profile. -    std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG); +    std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);      scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());      while (!CGI.isAtEnd()) {        auto Range = *CGI; @@ -1927,25 +1963,27 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {        ++CGI;      }    } else { -    scc_iterator<CallGraph *> CGI = scc_begin(CG); -    while (!CGI.isAtEnd()) { -      for (CallGraphNode *Node : *CGI) { -        auto *F = Node->getFunction(); -        if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) -          FunctionOrderList.push_back(F); +    CG.buildRefSCCs(); +    for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { +      for (LazyCallGraph::SCC &C : RC) { +        for (LazyCallGraph::Node &N : C) { +          Function &F = N.getFunction(); +          if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile")) +            FunctionOrderList.push_back(&F); +        }        } -      ++CGI;      }    } +  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); +    LLVM_DEBUG({      dbgs() << "Function processing order:\n"; -    for (auto F : reverse(FunctionOrderList)) { +    for (auto F : FunctionOrderList) {        dbgs() << F->getName() << "\n";      }    }); -  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());    return FunctionOrderList;  } @@ -1954,7 +1992,7 @@ bool SampleProfileLoader::doInitialization(Module &M,    auto &Ctx = M.getContext();    auto ReaderOrErr = SampleProfileReader::create( -      Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename); +      Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);    if (std::error_code EC = ReaderOrErr.getError()) {      std::string Msg = "Could not open profile: " + EC.message();      Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); @@ -2016,6 +2054,16 @@ bool SampleProfileLoader::doInitialization(Module &M,          UsePreInlinerDecision = true;      } +    // Enable stale profile matching by default for probe-based profile. +    // Currently the matching relies on if the checksum mismatch is detected, +    // which is currently only available for pseudo-probe mode. Removing the +    // checksum check could cause regressions for some cases, so further tuning +    // might be needed if we want to enable it for all cases. +    if (Reader->profileIsProbeBased() && +        !SalvageStaleProfile.getNumOccurrences()) { +      SalvageStaleProfile = true; +    } +      if (!Reader->profileIsCS()) {        // Non-CS profile should be fine without a function size budget for the        // inliner since the contexts in the profile are either all from inlining @@ -2046,7 +2094,8 @@ bool SampleProfileLoader::doInitialization(Module &M,      }    } -  if (ReportProfileStaleness || PersistProfileStaleness) { +  if (ReportProfileStaleness || PersistProfileStaleness || +      SalvageStaleProfile) {      MatchingManager =          std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());    } @@ -2054,8 +2103,167 @@ bool SampleProfileLoader::doInitialization(Module &M,    return true;  } -void SampleProfileMatcher::detectProfileMismatch(const Function &F, -                                                 const FunctionSamples &FS) { +void SampleProfileMatcher::countProfileMismatches( +    const FunctionSamples &FS, +    const std::unordered_set<LineLocation, LineLocationHash> +        &MatchedCallsiteLocs, +    uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) { + +  auto isInvalidLineOffset = [](uint32_t LineOffset) { +    return LineOffset & 0x8000; +  }; + +  // Check if there are any callsites in the profile that does not match to any +  // IR callsites, those callsite samples will be discarded. +  for (auto &I : FS.getBodySamples()) { +    const LineLocation &Loc = I.first; +    if (isInvalidLineOffset(Loc.LineOffset)) +      continue; + +    uint64_t Count = I.second.getSamples(); +    if (!I.second.getCallTargets().empty()) { +      TotalCallsiteSamples += Count; +      FuncProfiledCallsites++; +      if (!MatchedCallsiteLocs.count(Loc)) { +        MismatchedCallsiteSamples += Count; +        FuncMismatchedCallsites++; +      } +    } +  } + +  for (auto &I : FS.getCallsiteSamples()) { +    const LineLocation &Loc = I.first; +    if (isInvalidLineOffset(Loc.LineOffset)) +      continue; + +    uint64_t Count = 0; +    for (auto &FM : I.second) { +      Count += FM.second.getHeadSamplesEstimate(); +    } +    TotalCallsiteSamples += Count; +    FuncProfiledCallsites++; +    if (!MatchedCallsiteLocs.count(Loc)) { +      MismatchedCallsiteSamples += Count; +      FuncMismatchedCallsites++; +    } +  } +} + +// Populate the anchors(direct callee name) from profile. +void SampleProfileMatcher::populateProfileCallsites( +    const FunctionSamples &FS, +    StringMap<std::set<LineLocation>> &CalleeToCallsitesMap) { +  for (const auto &I : FS.getBodySamples()) { +    const auto &Loc = I.first; +    const auto &CTM = I.second.getCallTargets(); +    // Filter out possible indirect calls, use direct callee name as anchor. +    if (CTM.size() == 1) { +      StringRef CalleeName = CTM.begin()->first(); +      const auto &Candidates = CalleeToCallsitesMap.try_emplace( +          CalleeName, std::set<LineLocation>()); +      Candidates.first->second.insert(Loc); +    } +  } + +  for (const auto &I : FS.getCallsiteSamples()) { +    const LineLocation &Loc = I.first; +    const auto &CalleeMap = I.second; +    // Filter out possible indirect calls, use direct callee name as anchor. +    if (CalleeMap.size() == 1) { +      StringRef CalleeName = CalleeMap.begin()->first; +      const auto &Candidates = CalleeToCallsitesMap.try_emplace( +          CalleeName, std::set<LineLocation>()); +      Candidates.first->second.insert(Loc); +    } +  } +} + +// Call target name anchor based profile fuzzy matching. +// Input: +// For IR locations, the anchor is the callee name of direct callsite; For +// profile locations, it's the call target name for BodySamples or inlinee's +// profile name for CallsiteSamples. +// Matching heuristic: +// First match all the anchors in lexical order, then split the non-anchor +// locations between the two anchors evenly, first half are matched based on the +// start anchor, second half are matched based on the end anchor. +// For example, given: +// IR locations:      [1, 2(foo), 3, 5, 6(bar), 7] +// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] +// The matching gives: +//   [1,    2(foo), 3,  5,  6(bar), 7] +//    |     |       |   |     |     | +//   [1, 2, 3(foo), 4,  7,  8(bar), 9] +// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. +void SampleProfileMatcher::runStaleProfileMatching( +    const std::map<LineLocation, StringRef> &IRLocations, +    StringMap<std::set<LineLocation>> &CalleeToCallsitesMap, +    LocToLocMap &IRToProfileLocationMap) { +  assert(IRToProfileLocationMap.empty() && +         "Run stale profile matching only once per function"); + +  auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { +    // Skip the unchanged location mapping to save memory. +    if (From != To) +      IRToProfileLocationMap.insert({From, To}); +  }; + +  // Use function's beginning location as the initial anchor. +  int32_t LocationDelta = 0; +  SmallVector<LineLocation> LastMatchedNonAnchors; + +  for (const auto &IR : IRLocations) { +    const auto &Loc = IR.first; +    StringRef CalleeName = IR.second; +    bool IsMatchedAnchor = false; +    // Match the anchor location in lexical order. +    if (!CalleeName.empty()) { +      auto ProfileAnchors = CalleeToCallsitesMap.find(CalleeName); +      if (ProfileAnchors != CalleeToCallsitesMap.end() && +          !ProfileAnchors->second.empty()) { +        auto CI = ProfileAnchors->second.begin(); +        const auto Candidate = *CI; +        ProfileAnchors->second.erase(CI); +        InsertMatching(Loc, Candidate); +        LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName +                          << " is matched from " << Loc << " to " << Candidate +                          << "\n"); +        LocationDelta = Candidate.LineOffset - Loc.LineOffset; + +        // Match backwards for non-anchor locations. +        // The locations in LastMatchedNonAnchors have been matched forwards +        // based on the previous anchor, spilt it evenly and overwrite the +        // second half based on the current anchor. +        for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; +             I < LastMatchedNonAnchors.size(); I++) { +          const auto &L = LastMatchedNonAnchors[I]; +          uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; +          LineLocation Candidate(CandidateLineOffset, L.Discriminator); +          InsertMatching(L, Candidate); +          LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L +                            << " to " << Candidate << "\n"); +        } + +        IsMatchedAnchor = true; +        LastMatchedNonAnchors.clear(); +      } +    } + +    // Match forwards for non-anchor locations. +    if (!IsMatchedAnchor) { +      uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; +      LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); +      InsertMatching(Loc, Candidate); +      LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " +                        << Candidate << "\n"); +      LastMatchedNonAnchors.emplace_back(Loc); +    } +  } +} + +void SampleProfileMatcher::runOnFunction(const Function &F, +                                         const FunctionSamples &FS) { +  bool IsFuncHashMismatch = false;    if (FunctionSamples::ProfileIsProbeBased) {      uint64_t Count = FS.getTotalSamples();      TotalFuncHashSamples += Count; @@ -2063,16 +2271,24 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,      if (!ProbeManager->profileIsValid(F, FS)) {        MismatchedFuncHashSamples += Count;        NumMismatchedFuncHash++; -      return; +      IsFuncHashMismatch = true;      }    }    std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs; +  // The value of the map is the name of direct callsite and use empty StringRef +  // for non-direct-call site. +  std::map<LineLocation, StringRef> IRLocations; -  // Go through all the callsites on the IR and flag the callsite if the target -  // name is the same as the one in the profile. +  // Extract profile matching anchors and profile mismatch metrics in the IR.    for (auto &BB : F) {      for (auto &I : BB) { +      // TODO: Support line-number based location(AutoFDO). +      if (FunctionSamples::ProfileIsProbeBased && isa<PseudoProbeInst>(&I)) { +        if (std::optional<PseudoProbe> Probe = extractProbe(I)) +          IRLocations.emplace(LineLocation(Probe->Id, 0), StringRef()); +      } +        if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))          continue; @@ -2084,6 +2300,17 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,          if (Function *Callee = CB->getCalledFunction())            CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName()); +        // Force to overwrite the callee name in case any non-call location was +        // written before. +        auto R = IRLocations.emplace(IRCallsite, CalleeName); +        R.first->second = CalleeName; +        assert((!FunctionSamples::ProfileIsProbeBased || R.second || +                R.first->second == CalleeName) && +               "Overwrite non-call or different callee name location for " +               "pseudo probe callsite"); + +        // Go through all the callsites on the IR and flag the callsite if the +        // target name is the same as the one in the profile.          const auto CTM = FS.findCallTargetMapAt(IRCallsite);          const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite); @@ -2105,55 +2332,54 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,      }    } -  auto isInvalidLineOffset = [](uint32_t LineOffset) { -    return LineOffset & 0x8000; -  }; +  // Detect profile mismatch for profile staleness metrics report. +  if (ReportProfileStaleness || PersistProfileStaleness) { +    uint64_t FuncMismatchedCallsites = 0; +    uint64_t FuncProfiledCallsites = 0; +    countProfileMismatches(FS, MatchedCallsiteLocs, FuncMismatchedCallsites, +                           FuncProfiledCallsites); +    TotalProfiledCallsites += FuncProfiledCallsites; +    NumMismatchedCallsites += FuncMismatchedCallsites; +    LLVM_DEBUG({ +      if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch && +          FuncMismatchedCallsites) +        dbgs() << "Function checksum is matched but there are " +               << FuncMismatchedCallsites << "/" << FuncProfiledCallsites +               << " mismatched callsites.\n"; +    }); +  } -  // Check if there are any callsites in the profile that does not match to any -  // IR callsites, those callsite samples will be discarded. -  for (auto &I : FS.getBodySamples()) { -    const LineLocation &Loc = I.first; -    if (isInvalidLineOffset(Loc.LineOffset)) -      continue; +  if (IsFuncHashMismatch && SalvageStaleProfile) { +    LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() +                      << "\n"); -    uint64_t Count = I.second.getSamples(); -    if (!I.second.getCallTargets().empty()) { -      TotalCallsiteSamples += Count; -      TotalProfiledCallsites++; -      if (!MatchedCallsiteLocs.count(Loc)) { -        MismatchedCallsiteSamples += Count; -        NumMismatchedCallsites++; -      } -    } -  } +    StringMap<std::set<LineLocation>> CalleeToCallsitesMap; +    populateProfileCallsites(FS, CalleeToCallsitesMap); -  for (auto &I : FS.getCallsiteSamples()) { -    const LineLocation &Loc = I.first; -    if (isInvalidLineOffset(Loc.LineOffset)) -      continue; +    // The matching result will be saved to IRToProfileLocationMap, create a new +    // map for each function. +    auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); -    uint64_t Count = 0; -    for (auto &FM : I.second) { -      Count += FM.second.getHeadSamplesEstimate(); -    } -    TotalCallsiteSamples += Count; -    TotalProfiledCallsites++; -    if (!MatchedCallsiteLocs.count(Loc)) { -      MismatchedCallsiteSamples += Count; -      NumMismatchedCallsites++; -    } +    runStaleProfileMatching(IRLocations, CalleeToCallsitesMap, +                            IRToProfileLocationMap);    }  } -void SampleProfileMatcher::detectProfileMismatch() { +void SampleProfileMatcher::runOnModule() {    for (auto &F : M) {      if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))        continue; -    FunctionSamples *FS = Reader.getSamplesFor(F); +    FunctionSamples *FS = nullptr; +    if (FlattenProfileForMatching) +      FS = getFlattenedSamplesFor(F); +    else +      FS = Reader.getSamplesFor(F);      if (!FS)        continue; -    detectProfileMismatch(F, *FS); +    runOnFunction(F, *FS);    } +  if (SalvageStaleProfile) +    distributeIRToProfileLocationMap();    if (ReportProfileStaleness) {      if (FunctionSamples::ProfileIsProbeBased) { @@ -2196,8 +2422,31 @@ void SampleProfileMatcher::detectProfileMismatch() {    }  } +void SampleProfileMatcher::distributeIRToProfileLocationMap( +    FunctionSamples &FS) { +  const auto ProfileMappings = FuncMappings.find(FS.getName()); +  if (ProfileMappings != FuncMappings.end()) { +    FS.setIRToProfileLocationMap(&(ProfileMappings->second)); +  } + +  for (auto &Inlinees : FS.getCallsiteSamples()) { +    for (auto FS : Inlinees.second) { +      distributeIRToProfileLocationMap(FS.second); +    } +  } +} + +// Use a central place to distribute the matching results. Outlined and inlined +// profile with the function name will be set to the same pointer. +void SampleProfileMatcher::distributeIRToProfileLocationMap() { +  for (auto &I : Reader.getProfiles()) { +    distributeIRToProfileLocationMap(I.second); +  } +} +  bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, -                                      ProfileSummaryInfo *_PSI, CallGraph *CG) { +                                      ProfileSummaryInfo *_PSI, +                                      LazyCallGraph &CG) {    GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);    PSI = _PSI; @@ -2240,8 +2489,10 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,    assert(SymbolMap.count(StringRef()) == 0 &&           "No empty StringRef should be added in SymbolMap"); -  if (ReportProfileStaleness || PersistProfileStaleness) -    MatchingManager->detectProfileMismatch(); +  if (ReportProfileStaleness || PersistProfileStaleness || +      SalvageStaleProfile) { +    MatchingManager->runOnModule(); +  }    bool retval = false;    for (auto *F : buildFunctionOrder(M, CG)) { @@ -2327,6 +2578,11 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)      return emitAnnotations(F);    return false;  } +SampleProfileLoaderPass::SampleProfileLoaderPass( +    std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase, +    IntrusiveRefCntPtr<vfs::FileSystem> FS) +    : ProfileFileName(File), ProfileRemappingFileName(RemappingFile), +      LTOPhase(LTOPhase), FS(std::move(FS)) {}  PreservedAnalyses SampleProfileLoaderPass::run(Module &M,                                                 ModuleAnalysisManager &AM) { @@ -2343,18 +2599,21 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,      return FAM.getResult<TargetLibraryAnalysis>(F);    }; +  if (!FS) +    FS = vfs::getRealFileSystem(); +    SampleProfileLoader SampleLoader(        ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,        ProfileRemappingFileName.empty() ? SampleProfileRemappingFile                                         : ProfileRemappingFileName, -      LTOPhase, GetAssumptionCache, GetTTI, GetTLI); +      LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);    if (!SampleLoader.doInitialization(M, &FAM))      return PreservedAnalyses::all();    ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); -  CallGraph &CG = AM.getResult<CallGraphAnalysis>(M); -  if (!SampleLoader.runOnModule(M, &AM, PSI, &CG)) +  LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M); +  if (!SampleLoader.runOnModule(M, &AM, PSI, CG))      return PreservedAnalyses::all();    return PreservedAnalyses::none(); | 
