diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp | 182 | 
1 files changed, 117 insertions, 65 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp index 30d959704745..1316919e65da 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp @@ -10,6 +10,7 @@  //  //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectOptimize.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -39,7 +40,6 @@  #include <memory>  #include <queue>  #include <stack> -#include <string>  using namespace llvm; @@ -97,36 +97,22 @@ static cl::opt<bool>  namespace { -class SelectOptimize : public FunctionPass { +class SelectOptimizeImpl {    const TargetMachine *TM = nullptr;    const TargetSubtargetInfo *TSI = nullptr;    const TargetLowering *TLI = nullptr;    const TargetTransformInfo *TTI = nullptr;    const LoopInfo *LI = nullptr; -  DominatorTree *DT = nullptr; -  std::unique_ptr<BlockFrequencyInfo> BFI; -  std::unique_ptr<BranchProbabilityInfo> BPI; +  BlockFrequencyInfo *BFI;    ProfileSummaryInfo *PSI = nullptr;    OptimizationRemarkEmitter *ORE = nullptr;    TargetSchedModel TSchedModel;  public: -  static char ID; - -  SelectOptimize() : FunctionPass(ID) { -    initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); -  } - -  bool runOnFunction(Function &F) override; - -  void getAnalysisUsage(AnalysisUsage &AU) const override { -    AU.addRequired<ProfileSummaryInfoWrapperPass>(); -    AU.addRequired<TargetPassConfig>(); -    AU.addRequired<TargetTransformInfoWrapperPass>(); -    AU.addRequired<DominatorTreeWrapperPass>(); -    AU.addRequired<LoopInfoWrapperPass>(); -    AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); -  } +  SelectOptimizeImpl() = default; +  SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){}; +  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +  bool runOnFunction(Function &F, Pass &P);  private:    // Select groups consist of consecutive select instructions with the same @@ -212,29 +198,94 @@ private:    // Returns true if the target architecture supports lowering a given select.    bool isSelectKindSupported(SelectInst *SI);  }; + +class SelectOptimize : public FunctionPass { +  SelectOptimizeImpl Impl; + +public: +  static char ID; + +  SelectOptimize() : FunctionPass(ID) { +    initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); +  } + +  bool runOnFunction(Function &F) override { +    return Impl.runOnFunction(F, *this); +  } + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.addRequired<ProfileSummaryInfoWrapperPass>(); +    AU.addRequired<TargetPassConfig>(); +    AU.addRequired<TargetTransformInfoWrapperPass>(); +    AU.addRequired<LoopInfoWrapperPass>(); +    AU.addRequired<BlockFrequencyInfoWrapperPass>(); +    AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); +  } +}; +  } // namespace +PreservedAnalyses SelectOptimizePass::run(Function &F, +                                          FunctionAnalysisManager &FAM) { +  SelectOptimizeImpl Impl(TM); +  return Impl.run(F, FAM); +} +  char SelectOptimize::ID = 0;  INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,                        false)  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)  INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)  INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)  INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)  INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)  INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,                      false)  FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } -bool SelectOptimize::runOnFunction(Function &F) { -  TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); +PreservedAnalyses SelectOptimizeImpl::run(Function &F, +                                          FunctionAnalysisManager &FAM) { +  TSI = TM->getSubtargetImpl(F); +  TLI = TSI->getTargetLowering(); + +  // If none of the select types are supported then skip this pass. +  // This is an optimization pass. Legality issues will be handled by +  // instruction selection. +  if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && +      !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && +      !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) +    return PreservedAnalyses::all(); + +  TTI = &FAM.getResult<TargetIRAnalysis>(F); +  if (!TTI->enableSelectOptimize()) +    return PreservedAnalyses::all(); + +  PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F) +            .getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); +  assert(PSI && "This pass requires module analysis pass `profile-summary`!"); +  BFI = &FAM.getResult<BlockFrequencyAnalysis>(F); + +  // When optimizing for size, selects are preferable over branches. +  if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI)) +    return PreservedAnalyses::all(); + +  LI = &FAM.getResult<LoopAnalysis>(F); +  ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); +  TSchedModel.init(TSI); + +  bool Changed = optimizeSelects(F); +  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) { +  TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();    TSI = TM->getSubtargetImpl(F);    TLI = TSI->getTargetLowering(); -  // If none of the select types is supported then skip this pass. +  // If none of the select types are supported then skip this pass.    // This is an optimization pass. Legality issues will be handled by    // instruction selection.    if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && @@ -242,27 +293,25 @@ bool SelectOptimize::runOnFunction(Function &F) {        !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))      return false; -  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); +  TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);    if (!TTI->enableSelectOptimize())      return false; -  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); -  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); -  BPI.reset(new BranchProbabilityInfo(F, *LI)); -  BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); -  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); -  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); +  LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); +  BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); +  PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); +  ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();    TSchedModel.init(TSI);    // When optimizing for size, selects are preferable over branches. -  if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) +  if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))      return false;    return optimizeSelects(F);  } -bool SelectOptimize::optimizeSelects(Function &F) { +bool SelectOptimizeImpl::optimizeSelects(Function &F) {    // Determine for which select groups it is profitable converting to branches.    SelectGroups ProfSIGroups;    // Base heuristics apply only to non-loops and outer loops. @@ -278,8 +327,8 @@ bool SelectOptimize::optimizeSelects(Function &F) {    return !ProfSIGroups.empty();  } -void SelectOptimize::optimizeSelectsBase(Function &F, -                                         SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::optimizeSelectsBase(Function &F, +                                             SelectGroups &ProfSIGroups) {    // Collect all the select groups.    SelectGroups SIGroups;    for (BasicBlock &BB : F) { @@ -294,8 +343,8 @@ void SelectOptimize::optimizeSelectsBase(Function &F,    findProfitableSIGroupsBase(SIGroups, ProfSIGroups);  } -void SelectOptimize::optimizeSelectsInnerLoops(Function &F, -                                               SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F, +                                                   SelectGroups &ProfSIGroups) {    SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());    // Need to check size on each iteration as we accumulate child loops.    for (unsigned long i = 0; i < Loops.size(); ++i) @@ -332,7 +381,7 @@ getTrueOrFalseValue(SelectInst *SI, bool isTrue,    return V;  } -void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {    for (SelectGroup &ASI : ProfSIGroups) {      // The code transformation here is a modified version of the sinking      // transformation in CodeGenPrepare::optimizeSelectInst with a more @@ -425,7 +474,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {      BasicBlock *StartBlock = SI->getParent();      BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));      BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); -    BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); +    BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));      // Delete the unconditional branch that was just created by the split.      StartBlock->getTerminator()->eraseFromParent(); @@ -439,7 +488,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {        DIt++;      }      for (auto *DI : DebugPseudoINS) { -      DI->moveBefore(&*EndBlock->getFirstInsertionPt()); +      DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt());      }      // These are the new basic blocks for the conditional branch. @@ -505,7 +554,8 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {      for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {        SelectInst *SI = *It;        // The select itself is replaced with a PHI Node. -      PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); +      PHINode *PN = PHINode::Create(SI->getType(), 2, ""); +      PN->insertBefore(EndBlock->begin());        PN->takeName(SI);        PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);        PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); @@ -531,8 +581,8 @@ static bool isSpecialSelect(SelectInst *SI) {    return false;  } -void SelectOptimize::collectSelectGroups(BasicBlock &BB, -                                         SelectGroups &SIGroups) { +void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, +                                             SelectGroups &SIGroups) {    BasicBlock::iterator BBIt = BB.begin();    while (BBIt != BB.end()) {      Instruction *I = &*BBIt++; @@ -565,8 +615,8 @@ void SelectOptimize::collectSelectGroups(BasicBlock &BB,    }  } -void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, -                                                SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::findProfitableSIGroupsBase( +    SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {    for (SelectGroup &ASI : SIGroups) {      ++NumSelectOptAnalyzed;      if (isConvertToBranchProfitableBase(ASI)) @@ -580,14 +630,14 @@ static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,    ORE->emit(Rem);  } -void SelectOptimize::findProfitableSIGroupsInnerLoops( +void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(      const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {    NumSelectOptAnalyzed += SIGroups.size();    // For each select group in an inner-most loop,    // a branch is more preferable than a select/conditional-move if:    // i) conversion to branches for all the select groups of the loop satisfies    //    loop-level heuristics including reducing the loop's critical path by -  //    some threshold (see SelectOptimize::checkLoopHeuristics); and +  //    some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and    // ii) the total cost of the select group is cheaper with a branch compared    //     to its predicated version. The cost is in terms of latency and the cost    //     of a select group is the cost of its most expensive select instruction @@ -627,7 +677,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(    }  } -bool SelectOptimize::isConvertToBranchProfitableBase( +bool SelectOptimizeImpl::isConvertToBranchProfitableBase(      const SmallVector<SelectInst *, 2> &ASI) {    SelectInst *SI = ASI.front();    LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n"); @@ -635,7 +685,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(    OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);    // Skip cold basic blocks. Better to optimize for size for cold blocks. -  if (PSI->isColdBlock(SI->getParent(), BFI.get())) { +  if (PSI->isColdBlock(SI->getParent(), BFI)) {      ++NumSelectColdBB;      ORmiss << "Not converted to branch because of cold basic block. ";      EmitAndPrintRemark(ORE, ORmiss); @@ -678,7 +728,7 @@ static InstructionCost divideNearest(InstructionCost Numerator,    return (Numerator + (Denominator / 2)) / Denominator;  } -bool SelectOptimize::hasExpensiveColdOperand( +bool SelectOptimizeImpl::hasExpensiveColdOperand(      const SmallVector<SelectInst *, 2> &ASI) {    bool ColdOperand = false;    uint64_t TrueWeight, FalseWeight, TotalWeight; @@ -752,9 +802,10 @@ static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {  // (sufficiently-accurate in practice), we populate this set with the  // instructions of the backwards dependence slice that only have one-use and  // form an one-use chain that leads to the source instruction. -void SelectOptimize::getExclBackwardsSlice(Instruction *I, -                                           std::stack<Instruction *> &Slice, -                                           Instruction *SI, bool ForSinking) { +void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I, +                                               std::stack<Instruction *> &Slice, +                                               Instruction *SI, +                                               bool ForSinking) {    SmallPtrSet<Instruction *, 2> Visited;    std::queue<Instruction *> Worklist;    Worklist.push(I); @@ -798,7 +849,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I,    }  } -bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { +bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectInst *SI) {    uint64_t TrueWeight, FalseWeight;    if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {      uint64_t Max = std::max(TrueWeight, FalseWeight); @@ -812,8 +863,8 @@ bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {    return false;  } -bool SelectOptimize::checkLoopHeuristics(const Loop *L, -                                         const CostInfo LoopCost[2]) { +bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L, +                                             const CostInfo LoopCost[2]) {    // Loop-level checks to determine if a non-predicated version (with branches)    // of the loop is more profitable than its predicated version. @@ -881,7 +932,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,  // and non-predicated version of the given loop.  // Returns false if unable to compute these costs due to invalid cost of loop  // instruction(s). -bool SelectOptimize::computeLoopCosts( +bool SelectOptimizeImpl::computeLoopCosts(      const Loop *L, const SelectGroups &SIGroups,      DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {    LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop " @@ -969,7 +1020,7 @@ bool SelectOptimize::computeLoopCosts(  }  SmallPtrSet<const Instruction *, 2> -SelectOptimize::getSIset(const SelectGroups &SIGroups) { +SelectOptimizeImpl::getSIset(const SelectGroups &SIGroups) {    SmallPtrSet<const Instruction *, 2> SIset;    for (const SelectGroup &ASI : SIGroups)      for (const SelectInst *SI : ASI) @@ -977,7 +1028,8 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) {    return SIset;  } -std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) { +std::optional<uint64_t> +SelectOptimizeImpl::computeInstCost(const Instruction *I) {    InstructionCost ICost =        TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);    if (auto OC = ICost.getValue()) @@ -986,8 +1038,8 @@ std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {  }  ScaledNumber<uint64_t> -SelectOptimize::getMispredictionCost(const SelectInst *SI, -                                     const Scaled64 CondCost) { +SelectOptimizeImpl::getMispredictionCost(const SelectInst *SI, +                                         const Scaled64 CondCost) {    uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;    // Account for the default misprediction rate when using a branch @@ -1012,8 +1064,8 @@ SelectOptimize::getMispredictionCost(const SelectInst *SI,  // Returns the cost of a branch when the prediction is correct.  // TrueCost * TrueProbability + FalseCost * FalseProbability.  ScaledNumber<uint64_t> -SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, -                                     const SelectInst *SI) { +SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, +                                         const SelectInst *SI) {    Scaled64 PredPathCost;    uint64_t TrueWeight, FalseWeight;    if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { @@ -1033,7 +1085,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,    return PredPathCost;  } -bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { +bool SelectOptimizeImpl::isSelectKindSupported(SelectInst *SI) {    bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);    if (VectorCond)      return false;  | 
