diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopUnswitch.cpp')
| -rw-r--r-- | lib/Transforms/Scalar/LoopUnswitch.cpp | 188 | 
1 files changed, 138 insertions, 50 deletions
| diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index d0c96fa627a4..bd468338a1d0 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1,4 +1,4 @@ -//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===// +//===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===//  //  //                     The LLVM Compiler Infrastructure  // @@ -26,30 +26,40 @@  //  //===----------------------------------------------------------------------===// -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/Analysis/BranchProbabilityInfo.h"  #include "llvm/Analysis/CodeMetrics.h"  #include "llvm/Analysis/DivergenceAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h"  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopPass.h"  #include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h"  #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h"  #include "llvm/IR/Instructions.h" -#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h"  #include "llvm/IR/Module.h" -#include "llvm/Support/BranchProbability.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" @@ -58,9 +68,15 @@  #include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h"  #include <algorithm> +#include <cassert>  #include <map>  #include <set> +#include <tuple> +#include <utility> +#include <vector> +  using namespace llvm;  #define DEBUG_TYPE "loop-unswitch" @@ -82,11 +98,9 @@ Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),  namespace {    class LUAnalysisCache { - -    typedef DenseMap<const SwitchInst*, SmallPtrSet<const Value *, 8> > -      UnswitchedValsMap; - -    typedef UnswitchedValsMap::iterator UnswitchedValsIt; +    using UnswitchedValsMap = +        DenseMap<const SwitchInst *, SmallPtrSet<const Value *, 8>>; +    using UnswitchedValsIt = UnswitchedValsMap::iterator;      struct LoopProperties {        unsigned CanBeUnswitchedCount; @@ -97,12 +111,12 @@ namespace {      // Here we use std::map instead of DenseMap, since we need to keep valid      // LoopProperties pointer for current loop for better performance. -    typedef std::map<const Loop*, LoopProperties> LoopPropsMap; -    typedef LoopPropsMap::iterator LoopPropsMapIt; +    using LoopPropsMap = std::map<const Loop *, LoopProperties>; +    using LoopPropsMapIt = LoopPropsMap::iterator;      LoopPropsMap LoopsProperties; -    UnswitchedValsMap *CurLoopInstructions; -    LoopProperties *CurrentLoopProperties; +    UnswitchedValsMap *CurLoopInstructions = nullptr; +    LoopProperties *CurrentLoopProperties = nullptr;      // A loop unswitching with an estimated cost above this threshold      // is not performed. MaxSize is turned into unswitching quota for @@ -121,9 +135,7 @@ namespace {      unsigned MaxSize;    public: -    LUAnalysisCache() -        : CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr), -          MaxSize(Threshold) {} +    LUAnalysisCache() : MaxSize(Threshold) {}      // Analyze loop. Check its size, calculate is it possible to unswitch      // it. Returns true if we can unswitch this loop. @@ -164,12 +176,12 @@ namespace {      LUAnalysisCache BranchesInfo;      bool OptimizeForSize; -    bool redoLoop; +    bool redoLoop = false; -    Loop *currentLoop; -    DominatorTree *DT; -    BasicBlock *loopHeader; -    BasicBlock *loopPreheader; +    Loop *currentLoop = nullptr; +    DominatorTree *DT = nullptr; +    BasicBlock *loopHeader = nullptr; +    BasicBlock *loopPreheader = nullptr;      bool SanitizeMemory;      LoopSafetyInfo SafetyInfo; @@ -185,16 +197,17 @@ namespace {    public:      static char ID; // Pass ID, replacement for typeid -    explicit LoopUnswitch(bool Os = false, bool hasBranchDivergence = false) : -      LoopPass(ID), OptimizeForSize(Os), redoLoop(false), -      currentLoop(nullptr), DT(nullptr), loopHeader(nullptr), -      loopPreheader(nullptr), hasBranchDivergence(hasBranchDivergence) { + +    explicit LoopUnswitch(bool Os = false, bool hasBranchDivergence = false) +        : LoopPass(ID), OptimizeForSize(Os), +          hasBranchDivergence(hasBranchDivergence) {          initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); -      } +    }      bool runOnLoop(Loop *L, LPPassManager &LPM) override;      bool processCurrentLoop();      bool isUnreachableDueToPreviousUnswitching(BasicBlock *); +      /// This transformation requires natural loop information & requires that      /// loop preheaders be inserted into the CFG.      /// @@ -207,7 +220,6 @@ namespace {      }    private: -      void releaseMemory() override {        BranchesInfo.forgetLoop(currentLoop);      } @@ -237,7 +249,7 @@ namespace {      void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,                                          BasicBlock *TrueDest,                                          BasicBlock *FalseDest, -                                        Instruction *InsertPt, +                                        BranchInst *OldBranch,                                          TerminatorInst *TI);      void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L); @@ -247,13 +259,13 @@ namespace {      Value *SimplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant,                                             Constant *Val);    }; -} + +} // end anonymous namespace  // Analyze loop. Check its size, calculate is it possible to unswitch  // it. Returns true if we can unswitch this loop.  bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,                                  AssumptionCache *AC) { -    LoopPropsMapIt PropsIt;    bool Inserted;    std::tie(PropsIt, Inserted) = @@ -302,7 +314,6 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,  // Clean all data related to given loop.  void LUAnalysisCache::forgetLoop(const Loop *L) { -    LoopPropsMapIt LIt = LoopsProperties.find(L);    if (LIt != LoopsProperties.end()) { @@ -337,7 +348,6 @@ bool LUAnalysisCache::CostAllowsUnswitching() {  // Note, that new loop data is stored inside the VMap.  void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,                                  const ValueToValueMapTy &VMap) { -    LoopProperties &NewLoopProps = LoopsProperties[NewLoop];    LoopProperties &OldLoopProps = *CurrentLoopProperties;    UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals; @@ -367,6 +377,7 @@ void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,  }  char LoopUnswitch::ID = 0; +  INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",                        false, false)  INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) @@ -518,9 +529,6 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {      Changed |= processCurrentLoop();    } while(redoLoop); -  // FIXME: Reconstruct dom info, because it is not preserved properly. -  if (Changed) -    DT->recalculate(*F);    return Changed;  } @@ -553,6 +561,48 @@ bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) {    return false;  } +/// FIXME: Remove this workaround when freeze related patches are done. +/// LoopUnswitch and Equality propagation in GVN have discrepancy about +/// whether branch on undef/poison has undefine behavior. Here it is to +/// rule out some common cases that we found such discrepancy already +/// causing problems. Detail could be found in PR31652. Note if the +/// func returns true, it is unsafe. But if it is false, it doesn't mean +/// it is necessarily safe. +static bool EqualityPropUnSafe(Value &LoopCond) { +  ICmpInst *CI = dyn_cast<ICmpInst>(&LoopCond); +  if (!CI || !CI->isEquality()) +    return false; + +  Value *LHS = CI->getOperand(0); +  Value *RHS = CI->getOperand(1); +  if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) +    return true; + +  auto hasUndefInPHI = [](PHINode &PN) { +    for (Value *Opd : PN.incoming_values()) { +      if (isa<UndefValue>(Opd)) +        return true; +    } +    return false; +  }; +  PHINode *LPHI = dyn_cast<PHINode>(LHS); +  PHINode *RPHI = dyn_cast<PHINode>(RHS); +  if ((LPHI && hasUndefInPHI(*LPHI)) || (RPHI && hasUndefInPHI(*RPHI))) +    return true; + +  auto hasUndefInSelect = [](SelectInst &SI) { +    if (isa<UndefValue>(SI.getTrueValue()) || +        isa<UndefValue>(SI.getFalseValue())) +      return true; +    return false; +  }; +  SelectInst *LSI = dyn_cast<SelectInst>(LHS); +  SelectInst *RSI = dyn_cast<SelectInst>(RHS); +  if ((LSI && hasUndefInSelect(*LSI)) || (RSI && hasUndefInSelect(*RSI))) +    return true; +  return false; +} +  /// Do actual work and unswitch loop if possible and profitable.  bool LoopUnswitch::processCurrentLoop() {    bool Changed = false; @@ -666,7 +716,7 @@ bool LoopUnswitch::processCurrentLoop() {          // unswitch on it if we desire.          Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),                                                 currentLoop, Changed).first; -        if (LoopCond && +        if (LoopCond && !EqualityPropUnSafe(*LoopCond) &&              UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {            ++NumBranches;            return true; @@ -831,7 +881,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,  /// mapping the blocks with the specified map.  static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,                         LoopInfo *LI, LPPassManager *LPM) { -  Loop &New = *new Loop(); +  Loop &New = *LI->AllocateLoop();    if (PL)      PL->addChildLoop(&New);    else @@ -852,31 +902,59 @@ static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,  }  /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, -/// otherwise branch to FalseDest. Insert the code immediately before InsertPt. +/// otherwise branch to FalseDest. Insert the code immediately before OldBranch +/// and remove (but not erase!) it from the function.  void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,                                                    BasicBlock *TrueDest,                                                    BasicBlock *FalseDest, -                                                  Instruction *InsertPt, +                                                  BranchInst *OldBranch,                                                    TerminatorInst *TI) { +  assert(OldBranch->isUnconditional() && "Preheader is not split correctly");    // Insert a conditional branch on LIC to the two preheaders.  The original    // code is the true version and the new code is the false version.    Value *BranchVal = LIC;    bool Swapped = false;    if (!isa<ConstantInt>(Val) ||        Val->getType() != Type::getInt1Ty(LIC->getContext())) -    BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val); +    BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);    else if (Val != ConstantInt::getTrue(Val->getContext())) {      // We want to enter the new loop when the condition is true.      std::swap(TrueDest, FalseDest);      Swapped = true;    } +  // Old branch will be removed, so save its parent and successor to update the +  // DomTree. +  auto *OldBranchSucc = OldBranch->getSuccessor(0); +  auto *OldBranchParent = OldBranch->getParent(); +    // Insert the new branch.    BranchInst *BI = -      IRBuilder<>(InsertPt).CreateCondBr(BranchVal, TrueDest, FalseDest, TI); +      IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI);    if (Swapped)      BI->swapProfMetadata(); +  // Remove the old branch so there is only one branch at the end. This is +  // needed to perform DomTree's internal DFS walk on the function's CFG. +  OldBranch->removeFromParent(); + +  // Inform the DT about the new branch. +  if (DT) { +    // First, add both successors. +    SmallVector<DominatorTree::UpdateType, 3> Updates; +    if (TrueDest != OldBranchParent) +      Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest}); +    if (FalseDest != OldBranchParent) +      Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest}); +    // If both of the new successors are different from the old one, inform the +    // DT that the edge was deleted. +    if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) { +      Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc}); +    } + +    DT->applyUpdates(Updates); +  } +    // If either edge is critical, split it. This helps preserve LoopSimplify    // form for enclosing loops.    auto Options = CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA(); @@ -916,10 +994,14 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,    // Okay, now we have a position to branch from and a position to branch to,    // insert the new conditional branch. -  EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, -                                 loopPreheader->getTerminator(), TI); -  LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L); -  loopPreheader->getTerminator()->eraseFromParent(); +  auto *OldBranch = dyn_cast<BranchInst>(loopPreheader->getTerminator()); +  assert(OldBranch && "Failed to split the preheader"); +  EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI); +  LPM->deleteSimpleAnalysisValue(OldBranch, L); + +  // EmitPreheaderBranchOnCondition removed the OldBranch from the function. +  // Delete it, as it is no longer needed. +  delete OldBranch;    // We need to reprocess this loop, it could be unswitched again.    redoLoop = true; @@ -1035,6 +1117,9 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {      if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))        return false;   // Can't handle this. +    if (EqualityPropUnSafe(*LoopCond)) +      return false; +      UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,                               CurrentTerm);      ++NumBranches; @@ -1231,7 +1316,10 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,    EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,                                   TI);    LPM->deleteSimpleAnalysisValue(OldBR, L); -  OldBR->eraseFromParent(); + +  // The OldBr was replaced by a new one and removed (but not erased) by +  // EmitPreheaderBranchOnCondition. It is no longer needed, so delete it. +  delete OldBR;    LoopProcessWorklist.push_back(NewLoop);    redoLoop = true; | 
