diff options
Diffstat (limited to 'lib/Transforms/IPO/PartialInlining.cpp')
| -rw-r--r-- | lib/Transforms/IPO/PartialInlining.cpp | 785 | 
1 files changed, 661 insertions, 124 deletions
| diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 8840435af642..683655f1f68b 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -13,45 +13,126 @@  //===----------------------------------------------------------------------===//  #include "llvm/Transforms/IPO/PartialInlining.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/BlockFrequencyInfo.h"  #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/CodeMetrics.h"  #include "llvm/Analysis/InlineCost.h"  #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h"  #include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DebugLoc.h"  #include "llvm/IR/DiagnosticInfo.h"  #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h"  #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h"  #include "llvm/IR/Module.h" +#include "llvm/IR/User.h"  #include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Transforms/IPO.h"  #include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> +#include <iterator> +#include <memory> +#include <tuple> +#include <vector> +  using namespace llvm;  #define DEBUG_TYPE "partial-inlining"  STATISTIC(NumPartialInlined,            "Number of callsites functions partially inlined into."); +STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with " +                                        "cold outlined regions were partially " +                                        "inlined into its caller(s)."); +STATISTIC(NumColdRegionsFound, +           "Number of cold single entry/exit regions found."); +STATISTIC(NumColdRegionsOutlined, +           "Number of cold single entry/exit regions outlined.");  // Command line option to disable partial-inlining. The default is false:  static cl::opt<bool>      DisablePartialInlining("disable-partial-inlining", cl::init(false), -                           cl::Hidden, cl::desc("Disable partial ininling")); +                           cl::Hidden, cl::desc("Disable partial inlining")); +// Command line option to disable multi-region partial-inlining. The default is +// false: +static cl::opt<bool> DisableMultiRegionPartialInline( +    "disable-mr-partial-inlining", cl::init(false), cl::Hidden, +    cl::desc("Disable multi-region partial inlining")); + +// Command line option to force outlining in regions with live exit variables. +// The default is false: +static cl::opt<bool> +    ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, +               cl::desc("Force outline regions with live exits")); + +// Command line option to enable marking outline functions with Cold Calling +// Convention. The default is false: +static cl::opt<bool> +    MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, +                       cl::desc("Mark outline function calls with ColdCC")); + +#ifndef NDEBUG +// Command line option to debug partial-inlining. The default is none: +static cl::opt<bool> TracePartialInlining("trace-partial-inlining", +                                          cl::init(false), cl::Hidden, +                                          cl::desc("Trace partial inlining.")); +#endif +  // This is an option used by testing:  static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",                                        cl::init(false), cl::ZeroOrMore,                                        cl::ReallyHidden,                                        cl::desc("Skip Cost Analysis")); +// Used to determine if a cold region is worth outlining based on +// its inlining cost compared to the original function.  Default is set at 10%. +// ie. if the cold region reduces the inlining cost of the original function by +// at least 10%. +static cl::opt<float> MinRegionSizeRatio( +    "min-region-size-ratio", cl::init(0.1), cl::Hidden, +    cl::desc("Minimum ratio comparing relative sizes of each " +             "outline candidate and original function")); +// Used to tune the minimum number of execution counts needed in the predecessor +// block to the cold edge. ie. confidence interval. +static cl::opt<unsigned> +    MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, +                             cl::desc("Minimum block executions to consider " +                                      "its BranchProbabilityInfo valid")); +// Used to determine when an edge is considered cold. Default is set to 10%. ie. +// if the branch probability is 10% or less, then it is deemed as 'cold'. +static cl::opt<float> ColdBranchRatio( +    "cold-branch-ratio", cl::init(0.1), cl::Hidden, +    cl::desc("Minimum BranchProbability to consider a region cold."));  static cl::opt<unsigned> MaxNumInlineBlocks(      "max-num-inline-blocks", cl::init(5), cl::Hidden, -    cl::desc("Max Number of Blocks  To be Partially Inlined")); +    cl::desc("Max number of blocks to be partially inlined"));  // Command line option to set the maximum number of partial inlining allowed  // for the module. The default value of -1 means no limit. @@ -75,9 +156,8 @@ static cl::opt<unsigned> ExtraOutliningPenalty(  namespace {  struct FunctionOutliningInfo { -  FunctionOutliningInfo() -      : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr), -        ReturnBlockPreds() {} +  FunctionOutliningInfo() = default; +    // Returns the number of blocks to be inlined including all blocks    // in Entries and one return block.    unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; } @@ -85,30 +165,69 @@ struct FunctionOutliningInfo {    // A set of blocks including the function entry that guard    // the region to be outlined.    SmallVector<BasicBlock *, 4> Entries; +    // The return block that is not included in the outlined region. -  BasicBlock *ReturnBlock; +  BasicBlock *ReturnBlock = nullptr; +    // The dominating block of the region to be outlined. -  BasicBlock *NonReturnBlock; +  BasicBlock *NonReturnBlock = nullptr; +    // The set of blocks in Entries that that are predecessors to ReturnBlock    SmallVector<BasicBlock *, 4> ReturnBlockPreds;  }; +struct FunctionOutliningMultiRegionInfo { +  FunctionOutliningMultiRegionInfo() +      : ORI() {} + +  // Container for outline regions +  struct OutlineRegionInfo { +    OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region, +                      BasicBlock *EntryBlock, BasicBlock *ExitBlock, +                      BasicBlock *ReturnBlock) +        : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock), +          ReturnBlock(ReturnBlock) {} +    SmallVector<BasicBlock *, 8> Region; +    BasicBlock *EntryBlock; +    BasicBlock *ExitBlock; +    BasicBlock *ReturnBlock; +  }; + +  SmallVector<OutlineRegionInfo, 4> ORI; +}; +  struct PartialInlinerImpl { +    PartialInlinerImpl(        std::function<AssumptionCache &(Function &)> *GetAC,        std::function<TargetTransformInfo &(Function &)> *GTTI,        Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI, -      ProfileSummaryInfo *ProfSI) -      : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} +      ProfileSummaryInfo *ProfSI, +      std::function<OptimizationRemarkEmitter &(Function &)> *GORE) +      : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI), +        GetORE(GORE) {} +    bool run(Module &M); -  Function *unswitchFunction(Function *F); +  // Main part of the transformation that calls helper functions to find +  // outlining candidates, clone & outline the function, and attempt to +  // partially inline the resulting function. Returns true if +  // inlining was successful, false otherwise.  Also returns the outline +  // function (only if we partially inlined early returns) as there is a +  // possibility to further "peel" early return statements that were left in the +  // outline function due to code size. +  std::pair<bool, Function *> unswitchFunction(Function *F);    // This class speculatively clones the the function to be partial inlined.    // At the end of partial inlining, the remaining callsites to the cloned    // function that are not partially inlined will be fixed up to reference    // the original function, and the cloned function will be erased.    struct FunctionCloner { -    FunctionCloner(Function *F, FunctionOutliningInfo *OI); +    // Two constructors, one for single region outlining, the other for +    // multi-region outlining. +    FunctionCloner(Function *F, FunctionOutliningInfo *OI, +                   OptimizationRemarkEmitter &ORE); +    FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI, +                   OptimizationRemarkEmitter &ORE);      ~FunctionCloner();      // Prepare for function outlining: making sure there is only @@ -116,20 +235,34 @@ struct PartialInlinerImpl {      // the return block.      void NormalizeReturnBlock(); -    // Do function outlining: -    Function *doFunctionOutlining(); +    // Do function outlining for cold regions. +    bool doMultiRegionFunctionOutlining(); +    // Do function outlining for region after early return block(s). +    // NOTE: For vararg functions that do the vararg handling in the outlined +    //       function, we temporarily generate IR that does not properly +    //       forward varargs to the outlined function. Calling InlineFunction +    //       will update calls to the outlined functions to properly forward +    //       the varargs. +    Function *doSingleRegionFunctionOutlining();      Function *OrigFunc = nullptr;      Function *ClonedFunc = nullptr; -    Function *OutlinedFunc = nullptr; -    BasicBlock *OutliningCallBB = nullptr; + +    typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair; +    // Keep track of Outlined Functions and the basic block they're called from. +    SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions; +      // ClonedFunc is inlined in one of its callers after function      // outlining.      bool IsFunctionInlined = false;      // The cost of the region to be outlined.      int OutlinedRegionCost = 0; +    // ClonedOI is specific to outlining non-early return blocks.      std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr; +    // ClonedOMRI is specific to outlining cold regions. +    std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;      std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr; +    OptimizationRemarkEmitter &ORE;    };  private: @@ -138,6 +271,7 @@ private:    std::function<TargetTransformInfo &(Function &)> *GetTTI;    Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;    ProfileSummaryInfo *PSI; +  std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;    // Return the frequency of the OutlininingBB relative to F's entry point.    // The result is no larger than 1 and is represented using BP. @@ -148,8 +282,7 @@ private:    // Return true if the callee of CS should be partially inlined with    // profit.    bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, -                           BlockFrequency WeightedOutliningRcost, -                           OptimizationRemarkEmitter &ORE); +                           BlockFrequency WeightedOutliningRcost);    // Try to inline DuplicateFunction (cloned from F with call to    // the OutlinedFunction into its callers. Return true @@ -196,17 +329,20 @@ private:    // - The second value is the estimated size of the new call sequence in    //   basic block Cloner.OutliningCallBB;    std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner); +    // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to    // approximate both the size and runtime cost (Note that in the current    // inline cost analysis, there is no clear distinction there either).    static int computeBBInlineCost(BasicBlock *BB);    std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F); - +  std::unique_ptr<FunctionOutliningMultiRegionInfo> +  computeOutliningColdRegionsInfo(Function *F);  };  struct PartialInlinerLegacyPass : public ModulePass {    static char ID; // Pass identification, replacement for typeid +    PartialInlinerLegacyPass() : ModulePass(ID) {      initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());    } @@ -216,6 +352,7 @@ struct PartialInlinerLegacyPass : public ModulePass {      AU.addRequired<ProfileSummaryInfoWrapperPass>();      AU.addRequired<TargetTransformInfoWrapperPass>();    } +    bool runOnModule(Module &M) override {      if (skipModule(M))        return false; @@ -225,6 +362,7 @@ struct PartialInlinerLegacyPass : public ModulePass {          &getAnalysis<TargetTransformInfoWrapperPass>();      ProfileSummaryInfo *PSI =          getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); +    std::unique_ptr<OptimizationRemarkEmitter> UPORE;      std::function<AssumptionCache &(Function &)> GetAssumptionCache =          [&ACT](Function &F) -> AssumptionCache & { @@ -236,9 +374,185 @@ struct PartialInlinerLegacyPass : public ModulePass {        return TTIWP->getTTI(F);      }; -    return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M); +    std::function<OptimizationRemarkEmitter &(Function &)> GetORE = +        [&UPORE](Function &F) -> OptimizationRemarkEmitter & { +      UPORE.reset(new OptimizationRemarkEmitter(&F)); +      return *UPORE.get(); +    }; + +    return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI, +                              &GetORE) +        .run(M);    }  }; + +} // end anonymous namespace + +std::unique_ptr<FunctionOutliningMultiRegionInfo> +PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) { +  BasicBlock *EntryBlock = &F->front(); + +  DominatorTree DT(*F); +  LoopInfo LI(DT); +  BranchProbabilityInfo BPI(*F, LI); +  std::unique_ptr<BlockFrequencyInfo> ScopedBFI; +  BlockFrequencyInfo *BFI; +  if (!GetBFI) { +    ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI)); +    BFI = ScopedBFI.get(); +  } else +    BFI = &(*GetBFI)(*F); + +  auto &ORE = (*GetORE)(*F); + +  // Return if we don't have profiling information. +  if (!PSI->hasInstrumentationProfile()) +    return std::unique_ptr<FunctionOutliningMultiRegionInfo>(); + +  std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo = +      llvm::make_unique<FunctionOutliningMultiRegionInfo>(); + +  auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) { +    BasicBlock *Dom = BlockList.front(); +    return BlockList.size() > 1 && +           std::distance(pred_begin(Dom), pred_end(Dom)) == 1; +  }; + +  auto IsSingleExit = +      [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * { +    BasicBlock *ExitBlock = nullptr; +    for (auto *Block : BlockList) { +      for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) { +        if (!is_contained(BlockList, *SI)) { +          if (ExitBlock) { +            ORE.emit([&]() { +              return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion", +                                              &SI->front()) +                     << "Region dominated by " +                     << ore::NV("Block", BlockList.front()->getName()) +                     << " has more than one region exit edge."; +            }); +            return nullptr; +          } else +            ExitBlock = Block; +        } +      } +    } +    return ExitBlock; +  }; + +  auto BBProfileCount = [BFI](BasicBlock *BB) { +    return BFI->getBlockProfileCount(BB) +               ? BFI->getBlockProfileCount(BB).getValue() +               : 0; +  }; + +  // Use the same computeBBInlineCost function to compute the cost savings of +  // the outlining the candidate region. +  int OverallFunctionCost = 0; +  for (auto &BB : *F) +    OverallFunctionCost += computeBBInlineCost(&BB); + +#ifndef NDEBUG +  if (TracePartialInlining) +    dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n"; +#endif +  int MinOutlineRegionCost = +      static_cast<int>(OverallFunctionCost * MinRegionSizeRatio); +  BranchProbability MinBranchProbability( +      static_cast<int>(ColdBranchRatio * MinBlockCounterExecution), +      MinBlockCounterExecution); +  bool ColdCandidateFound = false; +  BasicBlock *CurrEntry = EntryBlock; +  std::vector<BasicBlock *> DFS; +  DenseMap<BasicBlock *, bool> VisitedMap; +  DFS.push_back(CurrEntry); +  VisitedMap[CurrEntry] = true; +  // Use Depth First Search on the basic blocks to find CFG edges that are +  // considered cold. +  // Cold regions considered must also have its inline cost compared to the +  // overall inline cost of the original function.  The region is outlined only +  // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or +  // more. +  while (!DFS.empty()) { +    auto *thisBB = DFS.back(); +    DFS.pop_back(); +    // Only consider regions with predecessor blocks that are considered +    // not-cold (default: part of the top 99.99% of all block counters) +    // AND greater than our minimum block execution count (default: 100). +    if (PSI->isColdBB(thisBB, BFI) || +        BBProfileCount(thisBB) < MinBlockCounterExecution) +      continue; +    for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) { +      if (VisitedMap[*SI]) +        continue; +      VisitedMap[*SI] = true; +      DFS.push_back(*SI); +      // If branch isn't cold, we skip to the next one. +      BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI); +      if (SuccProb > MinBranchProbability) +        continue; +#ifndef NDEBUG +      if (TracePartialInlining) { +        dbgs() << "Found cold edge: " << thisBB->getName() << "->" +               << (*SI)->getName() << "\nBranch Probability = " << SuccProb +               << "\n"; +      } +#endif +      SmallVector<BasicBlock *, 8> DominateVector; +      DT.getDescendants(*SI, DominateVector); +      // We can only outline single entry regions (for now). +      if (!IsSingleEntry(DominateVector)) +        continue; +      BasicBlock *ExitBlock = nullptr; +      // We can only outline single exit regions (for now). +      if (!(ExitBlock = IsSingleExit(DominateVector))) +        continue; +      int OutlineRegionCost = 0; +      for (auto *BB : DominateVector) +        OutlineRegionCost += computeBBInlineCost(BB); + +#ifndef NDEBUG +      if (TracePartialInlining) +        dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n"; +#endif + +      if (OutlineRegionCost < MinOutlineRegionCost) { +        ORE.emit([&]() { +          return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", +                                            &SI->front()) +                 << ore::NV("Callee", F) << " inline cost-savings smaller than " +                 << ore::NV("Cost", MinOutlineRegionCost); +        }); +        continue; +      } +      // For now, ignore blocks that belong to a SISE region that is a +      // candidate for outlining.  In the future, we may want to look +      // at inner regions because the outer region may have live-exit +      // variables. +      for (auto *BB : DominateVector) +        VisitedMap[BB] = true; +      // ReturnBlock here means the block after the outline call +      BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor(); +      // assert(ReturnBlock && "ReturnBlock is NULL somehow!"); +      FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo( +          DominateVector, DominateVector.front(), ExitBlock, ReturnBlock); +      RegInfo.Region = DominateVector; +      OutliningInfo->ORI.push_back(RegInfo); +#ifndef NDEBUG +      if (TracePartialInlining) { +        dbgs() << "Found Cold Candidate starting at block: " +               << DominateVector.front()->getName() << "\n"; +      } +#endif +      ColdCandidateFound = true; +      NumColdRegionsFound++; +    } +  } +  if (ColdCandidateFound) +    return OutliningInfo; +  else +    return std::unique_ptr<FunctionOutliningMultiRegionInfo>();  }  std::unique_ptr<FunctionOutliningInfo> @@ -319,7 +633,6 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {      OutliningInfo->Entries.push_back(CurrEntry);      CurrEntry = OtherSucc; -    } while (true);    if (!CandidateFound) @@ -413,15 +726,19 @@ static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {  BranchProbability  PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { - +  BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;    auto EntryFreq =        Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());    auto OutliningCallFreq = -      Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB); - -  auto OutlineRegionRelFreq = -      BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(), -                                              EntryFreq.getFrequency()); +      Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB); +  // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE +  // we outlined any regions, so we may encounter situations where the +  // OutliningCallFreq is *slightly* bigger than the EntryFreq. +  if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) { +    OutliningCallFreq = EntryFreq; +  } +  auto OutlineRegionRelFreq = BranchProbability::getBranchProbability( +      OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());    if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))      return OutlineRegionRelFreq; @@ -448,10 +765,10 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {  }  bool PartialInlinerImpl::shouldPartialInline( -    CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, -    OptimizationRemarkEmitter &ORE) { - +    CallSite CS, FunctionCloner &Cloner, +    BlockFrequency WeightedOutliningRcost) {    using namespace ore; +    if (SkipCostAnalysis)      return true; @@ -461,30 +778,37 @@ bool PartialInlinerImpl::shouldPartialInline(    Function *Caller = CS.getCaller();    auto &CalleeTTI = (*GetTTI)(*Callee); +  auto &ORE = (*GetORE)(*Caller);    InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, -                                *GetAssumptionCache, GetBFI, PSI); +                                *GetAssumptionCache, GetBFI, PSI, &ORE);    if (IC.isAlways()) { -    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) +    ORE.emit([&]() { +      return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)               << NV("Callee", Cloner.OrigFunc) -             << " should always be fully inlined, not partially"); +             << " should always be fully inlined, not partially"; +    });      return false;    }    if (IC.isNever()) { -    ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) +    ORE.emit([&]() { +      return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)               << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "               << NV("Caller", Caller) -             << " because it should never be inlined (cost=never)"); +             << " because it should never be inlined (cost=never)"; +    });      return false;    }    if (!IC) { -    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call) +    ORE.emit([&]() { +      return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)               << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "               << NV("Caller", Caller) << " because too costly to inline (cost="               << NV("Cost", IC.getCost()) << ", threshold=" -             << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); +             << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"; +    });      return false;    }    const DataLayout &DL = Caller->getParent()->getDataLayout(); @@ -495,23 +819,28 @@ bool PartialInlinerImpl::shouldPartialInline(    // Weighted saving is smaller than weighted cost, return false    if (NormWeightedSavings < WeightedOutliningRcost) { -    ORE.emit( -        OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call) -        << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " -        << NV("Caller", Caller) << " runtime overhead (overhead=" -        << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) -        << ", savings=" -        << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")" -        << " of making the outlined call is too high"); +    ORE.emit([&]() { +      return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", +                                        Call) +             << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " +             << NV("Caller", Caller) << " runtime overhead (overhead=" +             << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) +             << ", savings=" +             << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) +             << ")" +             << " of making the outlined call is too high"; +    });      return false;    } -  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) +  ORE.emit([&]() { +    return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)             << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "             << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())             << " (threshold=" -           << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); +           << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"; +  });    return true;  } @@ -566,23 +895,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {  std::tuple<int, int>  PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) { - -  // Now compute the cost of the call sequence to the outlined function -  // 'OutlinedFunction' in BB 'OutliningCallBB': -  int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB); - -  // Now compute the cost of the extracted/outlined function itself: -  int OutlinedFunctionCost = 0; -  for (BasicBlock &BB : *Cloner.OutlinedFunc) { -    OutlinedFunctionCost += computeBBInlineCost(&BB); +  int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0; +  for (auto FuncBBPair : Cloner.OutlinedFunctions) { +    Function *OutlinedFunc = FuncBBPair.first; +    BasicBlock* OutliningCallBB = FuncBBPair.second; +    // Now compute the cost of the call sequence to the outlined function +    // 'OutlinedFunction' in BB 'OutliningCallBB': +    OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB); + +    // Now compute the cost of the extracted/outlined function itself: +    for (BasicBlock &BB : *OutlinedFunc) +      OutlinedFunctionCost += computeBBInlineCost(&BB);    } -    assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&           "Outlined function cost should be no less than the outlined region"); +    // The code extractor introduces a new root and exit stub blocks with    // additional unconditional branches. Those branches will be eliminated    // later with bb layout. The cost should be adjusted accordingly: -  OutlinedFunctionCost -= 2 * InlineConstants::InstrCost; +  OutlinedFunctionCost -= +      2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();    int OutliningRuntimeOverhead =        OutliningFuncCallCost + @@ -636,9 +968,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(    }  } -PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F, -                                                   FunctionOutliningInfo *OI) -    : OrigFunc(F) { +PartialInlinerImpl::FunctionCloner::FunctionCloner( +    Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE) +    : OrigFunc(F), ORE(ORE) {    ClonedOI = llvm::make_unique<FunctionOutliningInfo>();    // Clone the function, so that we can hack away on it. @@ -659,8 +991,39 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,    F->replaceAllUsesWith(ClonedFunc);  } -void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { +PartialInlinerImpl::FunctionCloner::FunctionCloner( +    Function *F, FunctionOutliningMultiRegionInfo *OI, +    OptimizationRemarkEmitter &ORE) +    : OrigFunc(F), ORE(ORE) { +  ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>(); + +  // Clone the function, so that we can hack away on it. +  ValueToValueMapTy VMap; +  ClonedFunc = CloneFunction(F, VMap); +  // Go through all Outline Candidate Regions and update all BasicBlock +  // information. +  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo : +       OI->ORI) { +    SmallVector<BasicBlock *, 8> Region; +    for (BasicBlock *BB : RegionInfo.Region) { +      Region.push_back(cast<BasicBlock>(VMap[BB])); +    } +    BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]); +    BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]); +    BasicBlock *NewReturnBlock = nullptr; +    if (RegionInfo.ReturnBlock) +      NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]); +    FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo( +        Region, NewEntryBlock, NewExitBlock, NewReturnBlock); +    ClonedOMRI->ORI.push_back(MappedRegionInfo); +  } +  // Go ahead and update all uses to the duplicate, so that we can just +  // use the inliner functionality when we're done hacking. +  F->replaceAllUsesWith(ClonedFunc); +} + +void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {    auto getFirstPHI = [](BasicBlock *BB) {      BasicBlock::iterator I = BB->begin();      PHINode *FirstPhi = nullptr; @@ -676,6 +1039,11 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {      return FirstPhi;    }; +  // Shouldn't need to normalize PHIs if we're not outlining non-early return +  // blocks. +  if (!ClonedOI) +    return; +    // Special hackery is needed with PHI nodes that have inputs from more than    // one extracted block.  For simplicity, just split the PHIs into a two-level    // sequence of PHIs, some of which will go in the extracted region, and some @@ -726,16 +1094,90 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {        DeadPhis.push_back(OldPhi);      }      ++I; -    } -    for (auto *DP : DeadPhis) -      DP->eraseFromParent(); +  } +  for (auto *DP : DeadPhis) +    DP->eraseFromParent(); + +  for (auto E : ClonedOI->ReturnBlockPreds) { +    E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); +  } +} + +bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { + +  auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) { +    int Cost = 0; +    for (BasicBlock* BB : Region) +      Cost += computeBBInlineCost(BB); +    return Cost; +  }; + +  assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline"); + +  if (ClonedOMRI->ORI.empty()) +    return false; + +  // The CodeExtractor needs a dominator tree. +  DominatorTree DT; +  DT.recalculate(*ClonedFunc); + +  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. +  LoopInfo LI(DT); +  BranchProbabilityInfo BPI(*ClonedFunc, LI); +  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); -    for (auto E : ClonedOI->ReturnBlockPreds) { -      E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); +  SetVector<Value *> Inputs, Outputs, Sinks; +  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo : +       ClonedOMRI->ORI) { +    int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region); + +    CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false, +                     ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false); + +    CE.findInputsOutputs(Inputs, Outputs, Sinks); + +#ifndef NDEBUG +    if (TracePartialInlining) { +      dbgs() << "inputs: " << Inputs.size() << "\n"; +      dbgs() << "outputs: " << Outputs.size() << "\n"; +      for (Value *value : Inputs) +        dbgs() << "value used in func: " << *value << "\n"; +      for (Value *output : Outputs) +        dbgs() << "instr used in func: " << *output << "\n";      } +#endif +    // Do not extract regions that have live exit variables. +    if (Outputs.size() > 0 && !ForceLiveExit) +      continue; + +    Function *OutlinedFunc = CE.extractCodeRegion(); + +    if (OutlinedFunc) { +      CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); +      BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent(); +      assert(OutliningCallBB->getParent() == ClonedFunc); +      OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB)); +      NumColdRegionsOutlined++; +      OutlinedRegionCost += CurrentOutlinedRegionCost; + +      if (MarkOutlinedColdCC) { +        OutlinedFunc->setCallingConv(CallingConv::Cold); +        OCS.setCallingConv(CallingConv::Cold); +      } +    } else +      ORE.emit([&]() { +        return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", +                                        &RegionInfo.Region.front()->front()) +               << "Failed to extract region at block " +               << ore::NV("Block", RegionInfo.Region.front()); +      }); +  } + +  return !OutlinedFunctions.empty();  } -Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() { +Function * +PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {    // Returns true if the block is to be partial inlined into the caller    // (i.e. not to be extracted to the out of line function)    auto ToBeInlined = [&, this](BasicBlock *BB) { @@ -744,6 +1186,16 @@ Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {              ClonedOI->Entries.end());    }; +  assert(ClonedOI && "Expecting OutlineInfo for single region outline"); +  // The CodeExtractor needs a dominator tree. +  DominatorTree DT; +  DT.recalculate(*ClonedFunc); + +  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. +  LoopInfo LI(DT); +  BranchProbabilityInfo BPI(*ClonedFunc, LI); +  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); +    // Gather up the blocks that we're going to extract.    std::vector<BasicBlock *> ToExtract;    ToExtract.push_back(ClonedOI->NonReturnBlock); @@ -759,26 +1211,27 @@ Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {        OutlinedRegionCost += computeBBInlineCost(&BB);      } -  // The CodeExtractor needs a dominator tree. -  DominatorTree DT; -  DT.recalculate(*ClonedFunc); - -  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. -  LoopInfo LI(DT); -  BranchProbabilityInfo BPI(*ClonedFunc, LI); -  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); -    // Extract the body of the if. -  OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, -                               ClonedFuncBFI.get(), &BPI) -                     .extractCodeRegion(); +  Function *OutlinedFunc = +      CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, +                    ClonedFuncBFI.get(), &BPI, +                    /* AllowVarargs */ true) +          .extractCodeRegion();    if (OutlinedFunc) { -    OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) -        .getInstruction() -        ->getParent(); +    BasicBlock *OutliningCallBB = +        PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) +            .getInstruction() +            ->getParent();      assert(OutliningCallBB->getParent() == ClonedFunc); -  } +    OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB)); +  } else +    ORE.emit([&]() { +      return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", +                                      &ToExtract.front()->front()) +             << "Failed to extract region at block " +             << ore::NV("Block", ToExtract.front()); +    });    return OutlinedFunc;  } @@ -789,76 +1242,133 @@ PartialInlinerImpl::FunctionCloner::~FunctionCloner() {    ClonedFunc->replaceAllUsesWith(OrigFunc);    ClonedFunc->eraseFromParent();    if (!IsFunctionInlined) { -    // Remove the function that is speculatively created if there is no +    // Remove each function that was speculatively created if there is no      // reference. -    if (OutlinedFunc) -      OutlinedFunc->eraseFromParent(); +    for (auto FuncBBPair : OutlinedFunctions) { +      Function *Func = FuncBBPair.first; +      Func->eraseFromParent(); +    }    }  } -Function *PartialInlinerImpl::unswitchFunction(Function *F) { +std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {    if (F->hasAddressTaken()) -    return nullptr; +    return {false, nullptr};    // Let inliner handle it    if (F->hasFnAttribute(Attribute::AlwaysInline)) -    return nullptr; +    return {false, nullptr};    if (F->hasFnAttribute(Attribute::NoInline)) -    return nullptr; +    return {false, nullptr};    if (PSI->isFunctionEntryCold(F)) -    return nullptr; +    return {false, nullptr};    if (F->user_begin() == F->user_end()) -    return nullptr; +    return {false, nullptr}; + +  auto &ORE = (*GetORE)(*F); + +  // Only try to outline cold regions if we have a profile summary, which +  // implies we have profiling information. +  if (PSI->hasProfileSummary() && F->getEntryCount().hasValue() && +      !DisableMultiRegionPartialInline) { +    std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI = +        computeOutliningColdRegionsInfo(F); +    if (OMRI) { +      FunctionCloner Cloner(F, OMRI.get(), ORE); + +#ifndef NDEBUG +      if (TracePartialInlining) { +        dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n"; +        dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold() +               << "\n"; +      } +#endif +      bool DidOutline = Cloner.doMultiRegionFunctionOutlining(); + +      if (DidOutline) { +#ifndef NDEBUG +        if (TracePartialInlining) { +          dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n"; +          Cloner.ClonedFunc->print(dbgs()); +          dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n"; +        } +#endif -  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F); +        if (tryPartialInline(Cloner)) +          return {true, nullptr}; +      } +    } +  } +  // Fall-thru to regular partial inlining if we: +  //    i) can't find any cold regions to outline, or +  //   ii) can't inline the outlined function anywhere. +  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);    if (!OI) -    return nullptr; +    return {false, nullptr}; -  FunctionCloner Cloner(F, OI.get()); +  FunctionCloner Cloner(F, OI.get(), ORE);    Cloner.NormalizeReturnBlock(); -  Function *OutlinedFunction = Cloner.doFunctionOutlining(); + +  Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining(); + +  if (!OutlinedFunction) +    return {false, nullptr};    bool AnyInline = tryPartialInline(Cloner);    if (AnyInline) -    return OutlinedFunction; +    return {true, OutlinedFunction}; -  return nullptr; +  return {false, nullptr};  }  bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { -  int NonWeightedRcost; -  int SizeCost; - -  if (Cloner.OutlinedFunc == nullptr) +  if (Cloner.OutlinedFunctions.empty())      return false; +  int SizeCost = 0; +  BlockFrequency WeightedRcost; +  int NonWeightedRcost;    std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner); -  auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner); -  auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq; - -  // The call sequence to the outlined function is larger than the original -  // outlined region size, it does not increase the chances of inlining -  // the function with outlining (The inliner usies the size increase to +  // Only calculate RelativeToEntryFreq when we are doing single region +  // outlining. +  BranchProbability RelativeToEntryFreq; +  if (Cloner.ClonedOI) { +    RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner); +  } else +    // RelativeToEntryFreq doesn't make sense when we have more than one +    // outlined call because each call will have a different relative frequency +    // to the entry block.  We can consider using the average, but the +    // usefulness of that information is questionable. For now, assume we never +    // execute the calls to outlined functions. +    RelativeToEntryFreq = BranchProbability(0, 1); + +  WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq; + +  // The call sequence(s) to the outlined function(s) are larger than the sum of +  // the original outlined region size(s), it does not increase the chances of +  // inlining the function with outlining (The inliner uses the size increase to    // model the cost of inlining a callee).    if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) { -    OptimizationRemarkEmitter ORE(Cloner.OrigFunc); +    auto &ORE = (*GetORE)(*Cloner.OrigFunc);      DebugLoc DLoc;      BasicBlock *Block;      std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc); -    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", +    ORE.emit([&]() { +      return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",                                          DLoc, Block)               << ore::NV("Function", Cloner.OrigFunc)               << " not partially inlined into callers (Original Size = "               << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)               << ", Size of call sequence to outlined function = " -             << ore::NV("NewSize", SizeCost) << ")"); +             << ore::NV("NewSize", SizeCost) << ")"; +    });      return false;    } @@ -882,18 +1392,26 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {      if (IsLimitReached())        continue; -    OptimizationRemarkEmitter ORE(CS.getCaller()); -    if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE)) +    if (!shouldPartialInline(CS, Cloner, WeightedRcost))        continue; -    ORE.emit( -        OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()) -        << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " -        << ore::NV("Caller", CS.getCaller())); +    auto &ORE = (*GetORE)(*CS.getCaller()); +    // Construct remark before doing the inlining, as after successful inlining +    // the callsite is removed. +    OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()); +    OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " +       << ore::NV("Caller", CS.getCaller());      InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); -    InlineFunction(CS, IFI); +    // We can only forward varargs when we outlined a single region, else we +    // bail on vararg functions. +    if (!InlineFunction(CS, IFI, nullptr, true, +                        (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first +                                         : nullptr))) +      continue; + +    ORE.emit(OR);      // Now update the entry count:      if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) { @@ -904,13 +1422,23 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {      AnyInline = true;      NumPartialInlining++;      // Update the stats -    NumPartialInlined++; +    if (Cloner.ClonedOI) +      NumPartialInlined++; +    else +      NumColdOutlinePartialInlined++; +    }    if (AnyInline) {      Cloner.IsFunctionInlined = true;      if (CalleeEntryCount)        Cloner.OrigFunc->setEntryCount(CalleeEntryCountV); +    auto &ORE = (*GetORE)(*Cloner.OrigFunc); +    ORE.emit([&]() { +      return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc) +             << "Partially inlined into at least one caller"; +    }); +    }    return AnyInline; @@ -944,8 +1472,10 @@ bool PartialInlinerImpl::run(Module &M) {      if (Recursive)        continue; -    if (Function *NewFunc = unswitchFunction(CurrFunc)) { -      Worklist.push_back(NewFunc); +    std::pair<bool, Function * > Result = unswitchFunction(CurrFunc); +    if (Result.second) +      Worklist.push_back(Result.second); +    if (Result.first) {        Changed = true;      }    } @@ -954,6 +1484,7 @@ bool PartialInlinerImpl::run(Module &M) {  }  char PartialInlinerLegacyPass::ID = 0; +  INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",                        "Partial Inliner", false, false)  INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) @@ -985,9 +1516,15 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,      return FAM.getResult<TargetIRAnalysis>(F);    }; +  std::function<OptimizationRemarkEmitter &(Function &)> GetORE = +      [&FAM](Function &F) -> OptimizationRemarkEmitter & { +    return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); +  }; +    ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); -  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M)) +  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI, &GetORE) +          .run(M))      return PreservedAnalyses::none();    return PreservedAnalyses::all();  } | 
