diff options
Diffstat (limited to 'lib/Transforms/IPO/SampleProfile.cpp')
| -rw-r--r-- | lib/Transforms/IPO/SampleProfile.cpp | 380 | 
1 files changed, 253 insertions, 127 deletions
| diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 6baada2c1ae1..f0e781b9d923 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -23,39 +23,65 @@  //===----------------------------------------------------------------------===//  #include "llvm/Transforms/SampleProfile.h" +#include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h"  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h"  #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h"  #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InlineCost.h"  #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h"  #include "llvm/IR/DiagnosticInfo.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h"  #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h"  #include "llvm/IR/ValueSymbolTable.h"  #include "llvm/Pass.h"  #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/SampleProf.h"  #include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/Support/Casting.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/Format.h" +#include "llvm/Support/GenericDomTree.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/IPO.h"  #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h"  #include "llvm/Transforms/Utils/Cloning.h" -#include <cctype> +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> +#include <limits> +#include <map> +#include <memory> +#include <string> +#include <system_error> +#include <utility> +#include <vector>  using namespace llvm;  using namespace sampleprof; @@ -67,34 +93,39 @@ using namespace sampleprof;  static cl::opt<std::string> SampleProfileFile(      "sample-profile-file", cl::init(""), cl::value_desc("filename"),      cl::desc("Profile file loaded by -sample-profile"), cl::Hidden); +  static cl::opt<unsigned> SampleProfileMaxPropagateIterations(      "sample-profile-max-propagate-iterations", cl::init(100),      cl::desc("Maximum number of iterations to go through when propagating "               "sample block/edge weights through the CFG.")); +  static cl::opt<unsigned> SampleProfileRecordCoverage(      "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),      cl::desc("Emit a warning if less than N% of records in the input profile "               "are matched to the IR.")); +  static cl::opt<unsigned> SampleProfileSampleCoverage(      "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),      cl::desc("Emit a warning if less than N% of samples in the input profile "               "are matched to the IR.")); +  static cl::opt<double> SampleProfileHotThreshold(      "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),      cl::desc("Inlined functions that account for more than N% of all samples "               "collected in the parent function, will be inlined again."));  namespace { -typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap; -typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap; -typedef std::pair<const BasicBlock *, const BasicBlock *> Edge; -typedef DenseMap<Edge, uint64_t> EdgeWeightMap; -typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>> -    BlockEdgeMap; + +using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>; +using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>; +using Edge = std::pair<const BasicBlock *, const BasicBlock *>; +using EdgeWeightMap = DenseMap<Edge, uint64_t>; +using BlockEdgeMap = +    DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>;  class SampleCoverageTracker {  public: -  SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {} +  SampleCoverageTracker() = default;    bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,                         uint32_t Discriminator, uint64_t Samples); @@ -103,15 +134,16 @@ public:    unsigned countBodyRecords(const FunctionSamples *FS) const;    uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }    uint64_t countBodySamples(const FunctionSamples *FS) const; +    void clear() {      SampleCoverage.clear();      TotalUsedSamples = 0;    }  private: -  typedef std::map<LineLocation, unsigned> BodySampleCoverageMap; -  typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap> -      FunctionSamplesCoverageMap; +  using BodySampleCoverageMap = std::map<LineLocation, unsigned>; +  using FunctionSamplesCoverageMap = +      DenseMap<const FunctionSamples *, BodySampleCoverageMap>;    /// Coverage map for sampling records.    /// @@ -135,7 +167,7 @@ private:    /// and all the inlined callsites. Strictly, we should have a map of counters    /// keyed by FunctionSamples pointers, but these stats are cleared after    /// every function, so we just need to keep a single counter. -  uint64_t TotalUsedSamples; +  uint64_t TotalUsedSamples = 0;  };  /// \brief Sample profile pass. @@ -145,29 +177,31 @@ private:  /// profile information found in that file.  class SampleProfileLoader {  public: -  SampleProfileLoader(StringRef Name = SampleProfileFile) -      : DT(nullptr), PDT(nullptr), LI(nullptr), ACT(nullptr), Reader(), -        Samples(nullptr), Filename(Name), ProfileIsValid(false), -        TotalCollectedSamples(0) {} +  SampleProfileLoader( +      StringRef Name, bool IsThinLTOPreLink, +      std::function<AssumptionCache &(Function &)> GetAssumptionCache, +      std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo) +      : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo), +        Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {}    bool doInitialization(Module &M); -  bool runOnModule(Module &M); -  void setACT(AssumptionCacheTracker *A) { ACT = A; } +  bool runOnModule(Module &M, ModuleAnalysisManager *AM);    void dump() { Reader->dump(); }  protected: -  bool runOnFunction(Function &F); +  bool runOnFunction(Function &F, ModuleAnalysisManager *AM);    unsigned getFunctionLoc(Function &F);    bool emitAnnotations(Function &F);    ErrorOr<uint64_t> getInstWeight(const Instruction &I);    ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);    const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;    std::vector<const FunctionSamples *> -  findIndirectCallFunctionSamples(const Instruction &I) const; +  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;    const FunctionSamples *findFunctionSamples(const Instruction &I) const; +  bool inlineCallInstruction(Instruction *I);    bool inlineHotFunctions(Function &F, -                          DenseSet<GlobalValue::GUID> &ImportGUIDs); +                          DenseSet<GlobalValue::GUID> &InlinedGUIDs);    void printEdgeWeight(raw_ostream &OS, Edge E);    void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;    void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -222,7 +256,8 @@ protected:    std::unique_ptr<PostDomTreeBase<BasicBlock>> PDT;    std::unique_ptr<LoopInfo> LI; -  AssumptionCacheTracker *ACT; +  std::function<AssumptionCache &(Function &)> GetAC; +  std::function<TargetTransformInfo &(Function &)> GetTTI;    /// \brief Predecessors for each basic block in the CFG.    BlockEdgeMap Predecessors; @@ -236,19 +271,28 @@ protected:    std::unique_ptr<SampleProfileReader> Reader;    /// \brief Samples collected for the body of this function. -  FunctionSamples *Samples; +  FunctionSamples *Samples = nullptr;    /// \brief Name of the profile file to load.    std::string Filename;    /// \brief Flag indicating whether the profile input loaded successfully. -  bool ProfileIsValid; +  bool ProfileIsValid = false; + +  /// \brief Flag indicating if the pass is invoked in ThinLTO compile phase. +  /// +  /// In this phase, in annotation, we should not promote indirect calls. +  /// Instead, we will mark GUIDs that needs to be annotated to the function. +  bool IsThinLTOPreLink;    /// \brief Total number of samples collected in this profile.    ///    /// This is the sum of all the samples collected in all the functions executed    /// at runtime. -  uint64_t TotalCollectedSamples; +  uint64_t TotalCollectedSamples = 0; + +  /// \brief Optimization Remark Emitter used to emit diagnostic remarks. +  OptimizationRemarkEmitter *ORE = nullptr;  };  class SampleProfileLoaderLegacyPass : public ModulePass { @@ -256,8 +300,15 @@ public:    // Class identification, replacement for typeinfo    static char ID; -  SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile) -      : ModulePass(ID), SampleLoader(Name) { +  SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile, +                                bool IsThinLTOPreLink = false) +      : ModulePass(ID), SampleLoader(Name, IsThinLTOPreLink, +                                     [&](Function &F) -> AssumptionCache & { +                                       return ACT->getAssumptionCache(F); +                                     }, +                                     [&](Function &F) -> TargetTransformInfo & { +                                       return TTIWP->getTTI(F); +                                     }) {      initializeSampleProfileLoaderLegacyPassPass(          *PassRegistry::getPassRegistry());    } @@ -267,17 +318,23 @@ public:    bool doInitialization(Module &M) override {      return SampleLoader.doInitialization(M);    } +    StringRef getPassName() const override { return "Sample profile pass"; }    bool runOnModule(Module &M) override;    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<AssumptionCacheTracker>(); +    AU.addRequired<TargetTransformInfoWrapperPass>();    }  private:    SampleProfileLoader SampleLoader; +  AssumptionCacheTracker *ACT = nullptr; +  TargetTransformInfoWrapperPass *TTIWP = nullptr;  }; +} // end anonymous namespace +  /// Return true if the given callsite is hot wrt to its caller.  ///  /// Functions that were inlined in the original binary will be represented @@ -292,8 +349,8 @@ private:  ///  /// If that fraction is larger than the default given by  /// SampleProfileHotThreshold, the callsite will be inlined again. -bool callsiteIsHot(const FunctionSamples *CallerFS, -                   const FunctionSamples *CallsiteFS) { +static bool callsiteIsHot(const FunctionSamples *CallerFS, +                          const FunctionSamples *CallsiteFS) {    if (!CallsiteFS)      return false; // The callsite was not inlined in the original binary. @@ -309,7 +366,6 @@ bool callsiteIsHot(const FunctionSamples *CallerFS,        (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;    return PercentSamples >= SampleProfileHotThreshold;  } -}  /// Mark as used the sample record for the given function samples at  /// (LineOffset, Discriminator). @@ -423,6 +479,7 @@ unsigned SampleProfileLoader::getOffset(const DILocation *DIL) const {           0xffff;  } +#ifndef NDEBUG  /// \brief Print the weight of edge \p E on stream \p OS.  ///  /// \param OS  Stream to emit the output to. @@ -453,6 +510,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,    uint64_t W = (I == BlockWeights.end() ? 0 : I->second);    OS << "weight[" << BB->getName() << "]: " << W << "\n";  } +#endif  /// \brief Get the weight for an instruction.  /// @@ -480,10 +538,12 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {    if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))      return std::error_code(); -  // If a call/invoke instruction is inlined in profile, but not inlined here, +  // If a direct call/invoke instruction is inlined in profile +  // (findCalleeFunctionSamples returns non-empty result), but not inlined here,    // it means that the inlined callsite has no sample, thus the call    // instruction should have 0 count.    if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) && +      !ImmutableCallSite(&Inst).isIndirectCall() &&        findCalleeFunctionSamples(Inst))      return 0; @@ -495,13 +555,18 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {      bool FirstMark =          CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());      if (FirstMark) { -      const Function *F = Inst.getParent()->getParent(); -      LLVMContext &Ctx = F->getContext(); -      emitOptimizationRemark( -          Ctx, DEBUG_TYPE, *F, DLoc, -          Twine("Applied ") + Twine(*R) + -              " samples from profile (offset: " + Twine(LineOffset) + -              ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")"); +      ORE->emit([&]() { +        OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst); +        Remark << "Applied " << ore::NV("NumSamples", *R); +        Remark << " samples from profile (offset: "; +        Remark << ore::NV("LineOffset", LineOffset); +        if (Discriminator) { +          Remark << "."; +          Remark << ore::NV("Discriminator", Discriminator); +        } +        Remark << ")"; +        return Remark; +      });      }      DEBUG(dbgs() << "    " << DLoc.getLine() << "."                   << DIL->getBaseDiscriminator() << ":" << Inst @@ -588,10 +653,11 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {  }  /// Returns a vector of FunctionSamples that are the indirect call targets -/// of \p Inst. The vector is sorted by the total number of samples. +/// of \p Inst. The vector is sorted by the total number of samples. Stores +/// the total call count of the indirect call in \p Sum.  std::vector<const FunctionSamples *>  SampleProfileLoader::findIndirectCallFunctionSamples( -    const Instruction &Inst) const { +    const Instruction &Inst, uint64_t &Sum) const {    const DILocation *DIL = Inst.getDebugLoc();    std::vector<const FunctionSamples *> R; @@ -603,16 +669,25 @@ SampleProfileLoader::findIndirectCallFunctionSamples(    if (FS == nullptr)      return R; +  uint32_t LineOffset = getOffset(DIL); +  uint32_t Discriminator = DIL->getBaseDiscriminator(); + +  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator); +  Sum = 0; +  if (T) +    for (const auto &T_C : T.get()) +      Sum += T_C.second;    if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(            LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) { -    if (M->size() == 0) +    if (M->empty())        return R;      for (const auto &NameFS : *M) { +      Sum += NameFS.second.getEntrySamples();        R.push_back(&NameFS.second);      }      std::sort(R.begin(), R.end(),                [](const FunctionSamples *L, const FunctionSamples *R) { -                return L->getTotalSamples() > R->getTotalSamples(); +                return L->getEntrySamples() > R->getEntrySamples();                });    }    return R; @@ -650,6 +725,39 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {    return FS;  } +bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { +  assert(isa<CallInst>(I) || isa<InvokeInst>(I)); +  CallSite CS(I); +  Function *CalledFunction = CS.getCalledFunction(); +  assert(CalledFunction); +  DebugLoc DLoc = I->getDebugLoc(); +  BasicBlock *BB = I->getParent(); +  InlineParams Params = getInlineParams(); +  Params.ComputeFullInlineCost = true; +  // Checks if there is anything in the reachable portion of the callee at +  // this callsite that makes this inlining potentially illegal. Need to +  // set ComputeFullInlineCost, otherwise getInlineCost may return early +  // when cost exceeds threshold without checking all IRs in the callee. +  // The acutal cost does not matter because we only checks isNever() to +  // see if it is legal to inline the callsite. +  InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC, +                                  None, nullptr, nullptr); +  if (Cost.isNever()) { +    ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB) +              << "incompatible inlining"); +    return false; +  } +  InlineFunctionInfo IFI(nullptr, &GetAC); +  if (InlineFunction(CS, IFI)) { +    // The call to InlineFunction erases I, so we can't pass it here. +    ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB) +              << "inlined hot callee '" << ore::NV("Callee", CalledFunction) +              << "' into '" << ore::NV("Caller", BB->getParent()) << "'"); +    return true; +  } +  return false; +} +  /// \brief Iteratively inline hot callsites of a function.  ///  /// Iteratively traverse all callsites of the function \p F, and find if @@ -659,17 +767,14 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {  /// it to direct call. Each indirect call is limited with a single target.  ///  /// \param F function to perform iterative inlining. -/// \param ImportGUIDs a set to be updated to include all GUIDs that come -///     from a different module but inlined in the profiled binary. +/// \param InlinedGUIDs a set to be updated to include all GUIDs that are +///     inlined in the profiled binary.  ///  /// \returns True if there is any inline happened.  bool SampleProfileLoader::inlineHotFunctions( -    Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) { +    Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {    DenseSet<Instruction *> PromotedInsns;    bool Changed = false; -  LLVMContext &Ctx = F.getContext(); -  std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&]( -      Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); };    while (true) {      bool LocalChanged = false;      SmallVector<Instruction *, 10> CIS; @@ -690,57 +795,59 @@ bool SampleProfileLoader::inlineHotFunctions(        }      }      for (auto I : CIS) { -      InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);        Function *CalledFunction = CallSite(I).getCalledFunction();        // Do not inline recursive calls.        if (CalledFunction == &F)          continue; -      Instruction *DI = I; -      if (!CalledFunction && !PromotedInsns.count(I) && -          CallSite(I).isIndirectCall()) -        for (const auto *FS : findIndirectCallFunctionSamples(*I)) { +      if (CallSite(I).isIndirectCall()) { +        if (PromotedInsns.count(I)) +          continue; +        uint64_t Sum; +        for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { +          if (IsThinLTOPreLink) { +            FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), +                                     Samples->getTotalSamples() * +                                         SampleProfileHotThreshold / 100); +            continue; +          }            auto CalleeFunctionName = FS->getName();            // If it is a recursive call, we do not inline it as it could bloat            // the code exponentially. There is way to better handle this, e.g.            // clone the caller first, and inline the cloned caller if it is -          // recursive. As llvm does not inline recursive calls, we will simply -          // ignore it instead of handling it explicitly. +          // recursive. As llvm does not inline recursive calls, we will +          // simply ignore it instead of handling it explicitly.            if (CalleeFunctionName == F.getName())              continue; +            const char *Reason = "Callee function not available";            auto R = SymbolMap.find(CalleeFunctionName); -          if (R == SymbolMap.end()) -            continue; -          CalledFunction = R->getValue(); -          if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { -            // The indirect target was promoted and inlined in the profile, as a -            // result, we do not have profile info for the branch probability. -            // We set the probability to 80% taken to indicate that the static -            // call is likely taken. -            DI = dyn_cast<Instruction>( -                promoteIndirectCall(I, CalledFunction, 80, 100, false) -                    ->stripPointerCasts()); +          if (R != SymbolMap.end() && R->getValue() && +              !R->getValue()->isDeclaration() && +              R->getValue()->getSubprogram() && +              isLegalToPromote(CallSite(I), R->getValue(), &Reason)) { +            uint64_t C = FS->getEntrySamples(); +            Instruction *DI = +                pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE); +            Sum -= C;              PromotedInsns.insert(I); +            // If profile mismatches, we should not attempt to inline DI. +            if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) && +                inlineCallInstruction(DI)) +              LocalChanged = true;            } else { -            DEBUG(dbgs() << "\nFailed to promote indirect call to " -                         << CalleeFunctionName << " because " << Reason -                         << "\n"); -            continue; +            DEBUG(dbgs() +                  << "\nFailed to promote indirect call to " +                  << CalleeFunctionName << " because " << Reason << "\n");            }          } -      if (!CalledFunction || !CalledFunction->getSubprogram()) { -        findCalleeFunctionSamples(*I)->findImportedFunctions( -            ImportGUIDs, F.getParent(), +      } else if (CalledFunction && CalledFunction->getSubprogram() && +                 !CalledFunction->isDeclaration()) { +        if (inlineCallInstruction(I)) +          LocalChanged = true; +      } else if (IsThinLTOPreLink) { +        findCalleeFunctionSamples(*I)->findInlinedFunctions( +            InlinedGUIDs, F.getParent(),              Samples->getTotalSamples() * SampleProfileHotThreshold / 100); -        continue; -      } -      DebugLoc DLoc = I->getDebugLoc(); -      if (InlineFunction(CallSite(DI), IFI)) { -        LocalChanged = true; -        emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc, -                               Twine("inlined hot callee '") + -                                   CalledFunction->getName() + "' into '" + -                                   F.getName() + "'");        }      }      if (LocalChanged) { @@ -1076,24 +1183,20 @@ void SampleProfileLoader::buildEdges(Function &F) {    }  } -/// Sorts the CallTargetMap \p M by count in descending order and stores the -/// sorted result in \p Sorted. Returns the total counts. -static uint64_t SortCallTargets(SmallVector<InstrProfValueData, 2> &Sorted, -                                const SampleRecord::CallTargetMap &M) { -  Sorted.clear(); -  uint64_t Sum = 0; -  for (auto I = M.begin(); I != M.end(); ++I) { -    Sum += I->getValue(); -    Sorted.push_back({Function::getGUID(I->getKey()), I->getValue()}); -  } -  std::sort(Sorted.begin(), Sorted.end(), +/// Returns the sorted CallTargetMap \p M by count in descending order. +static SmallVector<InstrProfValueData, 2> SortCallTargets( +    const SampleRecord::CallTargetMap &M) { +  SmallVector<InstrProfValueData, 2> R; +  for (auto I = M.begin(); I != M.end(); ++I) +    R.push_back({Function::getGUID(I->getKey()), I->getValue()}); +  std::sort(R.begin(), R.end(),              [](const InstrProfValueData &L, const InstrProfValueData &R) {                if (L.Count == R.Count)                  return L.Value > R.Value;                else                  return L.Count > R.Count;              }); -  return Sum; +  return R;  }  /// \brief Propagate weights into edges @@ -1184,10 +1287,12 @@ void SampleProfileLoader::propagateWeights(Function &F) {            if (!FS)              continue;            auto T = FS->findCallTargetMapAt(LineOffset, Discriminator); -          if (!T || T.get().size() == 0) +          if (!T || T.get().empty())              continue; -          SmallVector<InstrProfValueData, 2> SortedCallTargets; -          uint64_t Sum = SortCallTargets(SortedCallTargets, T.get()); +          SmallVector<InstrProfValueData, 2> SortedCallTargets = +              SortCallTargets(T.get()); +          uint64_t Sum; +          findIndirectCallFunctionSamples(I, Sum);            annotateValueSite(*I.getParent()->getParent()->getParent(), I,                              SortedCallTargets, Sum, IPVK_IndirectCallTarget,                              SortedCallTargets.size()); @@ -1211,7 +1316,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {                   << ".\n");      SmallVector<uint32_t, 4> Weights;      uint32_t MaxWeight = 0; -    DebugLoc MaxDestLoc; +    Instruction *MaxDestInst;      for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {        BasicBlock *Succ = TI->getSuccessor(I);        Edge E = std::make_pair(BB, Succ); @@ -1230,7 +1335,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {        if (Weight != 0) {          if (Weight > MaxWeight) {            MaxWeight = Weight; -          MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc(); +          MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();          }        }      } @@ -1243,15 +1348,13 @@ void SampleProfileLoader::propagateWeights(Function &F) {      // weights, the second pass does not need to set it.      if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {        DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); -      TI->setMetadata(llvm::LLVMContext::MD_prof, +      TI->setMetadata(LLVMContext::MD_prof,                        MDB.createBranchWeights(Weights)); -      emitOptimizationRemark( -          Ctx, DEBUG_TYPE, F, MaxDestLoc, -          Twine("most popular destination for conditional branches at ") + -              ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" + -                                   Twine(BranchLoc.getLine()) + ":" + -                                   Twine(BranchLoc.getCol())) -                           : Twine("<UNKNOWN LOCATION>"))); +      ORE->emit([&]() { +        return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst) +               << "most popular destination for conditional branches at " +               << ore::NV("CondBranchesLoc", BranchLoc); +      });      } else {        DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");      } @@ -1351,18 +1454,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {    DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()                 << ": " << getFunctionLoc(F) << "\n"); -  DenseSet<GlobalValue::GUID> ImportGUIDs; -  Changed |= inlineHotFunctions(F, ImportGUIDs); +  DenseSet<GlobalValue::GUID> InlinedGUIDs; +  Changed |= inlineHotFunctions(F, InlinedGUIDs);    // Compute basic block weights.    Changed |= computeBlockWeights(F);    if (Changed) {      // Add an entry count to the function using the samples gathered at the -    // function entry. Also sets the GUIDs that comes from a different -    // module but inlined in the profiled binary. This is aiming at making -    // the IR match the profiled binary before annotation. -    F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs); +    // function entry. +    // Sets the GUIDs that are inlined in the profiled binary. This is used +    // for ThinLink to make correct liveness analysis, and also make the IR +    // match the profiled binary before annotation. +    F.setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs);      // Compute dominance and loop info needed for propagation.      computeDominanceAndLoopInfo(F); @@ -1404,9 +1508,11 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {  }  char SampleProfileLoaderLegacyPass::ID = 0; +  INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",                        "Sample Profile loader", false, false)  INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)  INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",                      "Sample Profile loader", false, false) @@ -1431,7 +1537,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {    return new SampleProfileLoaderLegacyPass(Name);  } -bool SampleProfileLoader::runOnModule(Module &M) { +bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) {    if (!ProfileIsValid)      return false; @@ -1463,7 +1569,7 @@ bool SampleProfileLoader::runOnModule(Module &M) {    for (auto &F : M)      if (!F.isDeclaration()) {        clearFunctionData(); -      retval |= runOnFunction(F); +      retval |= runOnFunction(F, AM);      }    if (M.getProfileSummary() == nullptr)      M.setProfileSummary(Reader->getSummary().getMD(M.getContext())); @@ -1471,13 +1577,23 @@ bool SampleProfileLoader::runOnModule(Module &M) {  }  bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { -  // FIXME: pass in AssumptionCache correctly for the new pass manager. -  SampleLoader.setACT(&getAnalysis<AssumptionCacheTracker>()); -  return SampleLoader.runOnModule(M); +  ACT = &getAnalysis<AssumptionCacheTracker>(); +  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); +  return SampleLoader.runOnModule(M, nullptr);  } -bool SampleProfileLoader::runOnFunction(Function &F) { +bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {    F.setEntryCount(0); +  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; +  if (AM) { +    auto &FAM = +        AM->getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent()) +            .getManager(); +    ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); +  } else { +    OwnedORE = make_unique<OptimizationRemarkEmitter>(&F); +    ORE = OwnedORE.get(); +  }    Samples = Reader->getSamplesFor(F);    if (Samples && !Samples->empty())      return emitAnnotations(F); @@ -1486,13 +1602,23 @@ bool SampleProfileLoader::runOnFunction(Function &F) {  PreservedAnalyses SampleProfileLoaderPass::run(Module &M,                                                 ModuleAnalysisManager &AM) { +  FunctionAnalysisManager &FAM = +      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + +  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { +    return FAM.getResult<AssumptionAnalysis>(F); +  }; +  auto GetTTI = [&](Function &F) -> TargetTransformInfo & { +    return FAM.getResult<TargetIRAnalysis>(F); +  };    SampleProfileLoader SampleLoader( -      ProfileFileName.empty() ? SampleProfileFile : ProfileFileName); +      ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, +      IsThinLTOPreLink, GetAssumptionCache, GetTTI);    SampleLoader.doInitialization(M); -  if (!SampleLoader.runOnModule(M)) +  if (!SampleLoader.runOnModule(M, &AM))      return PreservedAnalyses::all();    return PreservedAnalyses::none(); | 
