diff options
Diffstat (limited to 'lib/Transforms/Scalar/JumpThreading.cpp')
| -rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 255 | 
1 files changed, 144 insertions, 111 deletions
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 1d66472f93c80..48de56a02834d 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -25,12 +25,12 @@  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/ConstantFolding.h"  #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/GuardUtils.h"  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/LazyValueInfo.h"  #include "llvm/Analysis/Loads.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CFG.h" @@ -38,6 +38,7 @@  #include "llvm/IR/ConstantRange.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DomTreeUpdater.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/InstrTypes.h" @@ -65,6 +66,7 @@  #include "llvm/Transforms/Scalar.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/SSAUpdater.h"  #include "llvm/Transforms/Utils/ValueMapper.h"  #include <algorithm> @@ -285,7 +287,7 @@ bool JumpThreading::runOnFunction(Function &F) {    auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();    auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();    auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); -  DeferredDominance DDT(*DT); +  DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);    std::unique_ptr<BlockFrequencyInfo> BFI;    std::unique_ptr<BranchProbabilityInfo> BPI;    bool HasProfileData = F.hasProfileData(); @@ -295,7 +297,7 @@ bool JumpThreading::runOnFunction(Function &F) {      BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));    } -  bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DDT, HasProfileData, +  bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, HasProfileData,                                std::move(BFI), std::move(BPI));    if (PrintLVIAfterJumpThreading) {      dbgs() << "LVI for function '" << F.getName() << "':\n"; @@ -312,7 +314,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,    auto &DT = AM.getResult<DominatorTreeAnalysis>(F);    auto &LVI = AM.getResult<LazyValueAnalysis>(F);    auto &AA = AM.getResult<AAManager>(F); -  DeferredDominance DDT(DT); +  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);    std::unique_ptr<BlockFrequencyInfo> BFI;    std::unique_ptr<BranchProbabilityInfo> BPI; @@ -322,7 +324,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,      BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));    } -  bool Changed = runImpl(F, &TLI, &LVI, &AA, &DDT, HasProfileData, +  bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, HasProfileData,                           std::move(BFI), std::move(BPI));    if (!Changed) @@ -336,14 +338,14 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,  bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,                                  LazyValueInfo *LVI_, AliasAnalysis *AA_, -                                DeferredDominance *DDT_, bool HasProfileData_, +                                DomTreeUpdater *DTU_, bool HasProfileData_,                                  std::unique_ptr<BlockFrequencyInfo> BFI_,                                  std::unique_ptr<BranchProbabilityInfo> BPI_) {    LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");    TLI = TLI_;    LVI = LVI_;    AA = AA_; -  DDT = DDT_; +  DTU = DTU_;    BFI.reset();    BPI.reset();    // When profile data is available, we need to update edge weights after @@ -360,7 +362,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,    // JumpThreading must not processes blocks unreachable from entry. It's a    // waste of compute time and can potentially lead to hangs.    SmallPtrSet<BasicBlock *, 16> Unreachable; -  DominatorTree &DT = DDT->flush(); +  assert(DTU && "DTU isn't passed into JumpThreading before using it."); +  assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed."); +  DominatorTree &DT = DTU->getDomTree();    for (auto &BB : F)      if (!DT.isReachableFromEntry(&BB))        Unreachable.insert(&BB); @@ -379,7 +383,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,        // Stop processing BB if it's the entry or is now deleted. The following        // routines attempt to eliminate BB and locating a suitable replacement        // for the entry is non-trivial. -      if (&BB == &F.getEntryBlock() || DDT->pendingDeletedBB(&BB)) +      if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))          continue;        if (pred_empty(&BB)) { @@ -390,7 +394,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,                            << '\n');          LoopHeaders.erase(&BB);          LVI->eraseBlock(&BB); -        DeleteDeadBlock(&BB, DDT); +        DeleteDeadBlock(&BB, DTU);          Changed = true;          continue;        } @@ -404,9 +408,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,            // Don't alter Loop headers and latches to ensure another pass can            // detect and transform nested loops later.            !LoopHeaders.count(&BB) && !LoopHeaders.count(BI->getSuccessor(0)) && -          TryToSimplifyUncondBranchFromEmptyBlock(&BB, DDT)) { -        // BB is valid for cleanup here because we passed in DDT. F remains -        // BB's parent until a DDT->flush() event. +          TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) { +        // BB is valid for cleanup here because we passed in DTU. F remains +        // BB's parent until a DTU->getDomTree() event.          LVI->eraseBlock(&BB);          Changed = true;        } @@ -415,7 +419,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,    } while (Changed);    LoopHeaders.clear(); -  DDT->flush(); +  // Flush only the Dominator Tree. +  DTU->getDomTree();    LVI->enableDT();    return EverChanged;  } @@ -569,9 +574,11 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {  /// BB in the result vector.  ///  /// This returns true if there were any known values. -bool JumpThreadingPass::ComputeValueKnownInPredecessors( +bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(      Value *V, BasicBlock *BB, PredValueInfo &Result, -    ConstantPreference Preference, Instruction *CxtI) { +    ConstantPreference Preference, +    DenseSet<std::pair<Value *, BasicBlock *>> &RecursionSet, +    Instruction *CxtI) {    // This method walks up use-def chains recursively.  Because of this, we could    // get into an infinite loop going around loops in the use-def chain.  To    // prevent this, keep track of what (value, block) pairs we've already visited @@ -579,10 +586,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(    if (!RecursionSet.insert(std::make_pair(V, BB)).second)      return false; -  // An RAII help to remove this pair from the recursion set once the recursion -  // stack pops back out again. -  RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB)); -    // If V is a constant, then it is known in all predecessors.    if (Constant *KC = getKnownConstant(V, Preference)) {      for (BasicBlock *Pred : predecessors(BB)) @@ -609,7 +612,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(      // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.      // Perhaps getConstantOnEdge should be smart enough to do this? -    if (DDT->pending()) +    if (DTU->hasPendingDomTreeUpdates())        LVI->disableDT();      else        LVI->enableDT(); @@ -626,7 +629,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(    /// If I is a PHI node, then we know the incoming values for any constants.    if (PHINode *PN = dyn_cast<PHINode>(I)) { -    if (DDT->pending()) +    if (DTU->hasPendingDomTreeUpdates())        LVI->disableDT();      else        LVI->enableDT(); @@ -652,7 +655,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(      Value *Source = CI->getOperand(0);      if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))        return false; -    ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI); +    ComputeValueKnownInPredecessorsImpl(Source, BB, Result, Preference, +                                        RecursionSet, CxtI);      if (Result.empty())        return false; @@ -672,10 +676,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(          I->getOpcode() == Instruction::And) {        PredValueInfoTy LHSVals, RHSVals; -      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, -                                      WantInteger, CxtI); -      ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals, -                                      WantInteger, CxtI); +      ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals, +                                      WantInteger, RecursionSet, CxtI); +      ComputeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals, +                                          WantInteger, RecursionSet, CxtI);        if (LHSVals.empty() && RHSVals.empty())          return false; @@ -710,8 +714,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(      if (I->getOpcode() == Instruction::Xor &&          isa<ConstantInt>(I->getOperand(1)) &&          cast<ConstantInt>(I->getOperand(1))->isOne()) { -      ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result, -                                      WantInteger, CxtI); +      ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result, +                                          WantInteger, RecursionSet, CxtI);        if (Result.empty())          return false; @@ -728,8 +732,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(              && "A binary operator creating a block address?");      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {        PredValueInfoTy LHSVals; -      ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals, -                                      WantInteger, CxtI); +      ComputeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals, +                                          WantInteger, RecursionSet, CxtI);        // Try to use constant folding to simplify the binary operator.        for (const auto &LHSVal : LHSVals) { @@ -759,7 +763,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(        const DataLayout &DL = PN->getModule()->getDataLayout();        // We can do this simplification if any comparisons fold to true or false.        // See if any do. -      if (DDT->pending()) +      if (DTU->hasPendingDomTreeUpdates())          LVI->disableDT();        else          LVI->enableDT(); @@ -806,7 +810,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(        if (!isa<Instruction>(CmpLHS) ||            cast<Instruction>(CmpLHS)->getParent() != BB) { -        if (DDT->pending()) +        if (DTU->hasPendingDomTreeUpdates())            LVI->disableDT();          else            LVI->enableDT(); @@ -838,7 +842,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(              match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {            if (!isa<Instruction>(AddLHS) ||                cast<Instruction>(AddLHS)->getParent() != BB) { -            if (DDT->pending()) +            if (DTU->hasPendingDomTreeUpdates())                LVI->disableDT();              else                LVI->enableDT(); @@ -874,8 +878,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(        // Try to find a constant value for the LHS of a comparison,        // and evaluate it statically if we can.        PredValueInfoTy LHSVals; -      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, -                                      WantInteger, CxtI); +      ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals, +                                          WantInteger, RecursionSet, CxtI);        for (const auto &LHSVal : LHSVals) {          Constant *V = LHSVal.first; @@ -895,8 +899,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(      Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);      PredValueInfoTy Conds;      if ((TrueVal || FalseVal) && -        ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds, -                                        WantInteger, CxtI)) { +        ComputeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds, +                                            WantInteger, RecursionSet, CxtI)) {        for (auto &C : Conds) {          Constant *Cond = C.first; @@ -923,7 +927,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(    }    // If all else fails, see if LVI can figure out a constant value for us. -  if (DDT->pending()) +  if (DTU->hasPendingDomTreeUpdates())      LVI->disableDT();    else      LVI->enableDT(); @@ -942,7 +946,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(  /// Since we can pick an arbitrary destination, we pick the successor with the  /// fewest predecessors.  This should reduce the in-degree of the others.  static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { -  TerminatorInst *BBTerm = BB->getTerminator(); +  Instruction *BBTerm = BB->getTerminator();    unsigned MinSucc = 0;    BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);    // Compute the successor with the minimum number of predecessors. @@ -974,7 +978,7 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) {  bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {    // If the block is trivially dead, just return and let the caller nuke it.    // This simplifies other transformations. -  if (DDT->pendingDeletedBB(BB) || +  if (DTU->isBBPendingDeletion(BB) ||        (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))      return false; @@ -983,15 +987,15 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {    // because now the condition in this block can be threaded through    // predecessors of our predecessor block.    if (BasicBlock *SinglePred = BB->getSinglePredecessor()) { -    const TerminatorInst *TI = SinglePred->getTerminator(); -    if (!TI->isExceptional() && TI->getNumSuccessors() == 1 && +    const Instruction *TI = SinglePred->getTerminator(); +    if (!TI->isExceptionalTerminator() && TI->getNumSuccessors() == 1 &&          SinglePred != BB && !hasAddressTakenAndUsed(BB)) {        // If SinglePred was a loop header, BB becomes one.        if (LoopHeaders.erase(SinglePred))          LoopHeaders.insert(BB);        LVI->eraseBlock(SinglePred); -      MergeBasicBlockIntoOnlyPred(BB, nullptr, DDT); +      MergeBasicBlockIntoOnlyPred(BB, DTU);        // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by        // BB code within one basic block `BB`), we need to invalidate the LVI @@ -1075,7 +1079,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {      std::vector<DominatorTree::UpdateType> Updates;      // Fold the branch/switch. -    TerminatorInst *BBTerm = BB->getTerminator(); +    Instruction *BBTerm = BB->getTerminator();      Updates.reserve(BBTerm->getNumSuccessors());      for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {        if (i == BestSucc) continue; @@ -1088,7 +1092,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {                        << "' folding undef terminator: " << *BBTerm << '\n');      BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);      BBTerm->eraseFromParent(); -    DDT->applyUpdates(Updates); +    DTU->applyUpdates(Updates);      return true;    } @@ -1100,7 +1104,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {                        << "' folding terminator: " << *BB->getTerminator()                        << '\n');      ++NumFolds; -    ConstantFoldTerminator(BB, true, nullptr, DDT); +    ConstantFoldTerminator(BB, true, nullptr, DTU);      return true;    } @@ -1127,7 +1131,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {        // threading is concerned.        assert(CondBr->isConditional() && "Threading on unconditional terminator"); -      if (DDT->pending()) +      if (DTU->hasPendingDomTreeUpdates())          LVI->disableDT();        else          LVI->enableDT(); @@ -1156,7 +1160,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {              ConstantInt::getFalse(CondCmp->getType());            ReplaceFoldableUses(CondCmp, CI);          } -        DDT->deleteEdge(BB, ToRemoveSucc); +        DTU->deleteEdgeRelaxed(BB, ToRemoveSucc);          return true;        } @@ -1167,6 +1171,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {      }    } +  if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) +    TryToUnfoldSelect(SI, BB); +    // Check for some cases that are worth simplifying.  Right now we want to look    // for loads that are used by a switch or by the condition for the branch.  If    // we see one, check to see if it's partially redundant.  If so, insert a PHI @@ -1240,7 +1247,7 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {        RemoveSucc->removePredecessor(BB);        BranchInst::Create(KeepSucc, BI);        BI->eraseFromParent(); -      DDT->deleteEdge(BB, RemoveSucc); +      DTU->deleteEdgeRelaxed(BB, RemoveSucc);        return true;      }      CurrentBB = CurrentPred; @@ -1296,7 +1303,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {      if (IsLoadCSE) {        LoadInst *NLoadI = cast<LoadInst>(AvailableVal); -      combineMetadataForCSE(NLoadI, LoadI); +      combineMetadataForCSE(NLoadI, LoadI, false);      };      // If the returned value is the load itself, replace with an undef. This can @@ -1486,7 +1493,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {    }    for (LoadInst *PredLoadI : CSELoads) { -    combineMetadataForCSE(PredLoadI, LoadI); +    combineMetadataForCSE(PredLoadI, LoadI, true);    }    LoadI->replaceAllUsesWith(PN); @@ -1544,7 +1551,7 @@ FindMostPopularDest(BasicBlock *BB,    // successor list.    if (!SamePopularity.empty()) {      SamePopularity.push_back(MostPopularDest); -    TerminatorInst *TI = BB->getTerminator(); +    Instruction *TI = BB->getTerminator();      for (unsigned i = 0; ; ++i) {        assert(i != TI->getNumSuccessors() && "Didn't find any successor!"); @@ -1664,10 +1671,10 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,        }        // Finally update the terminator. -      TerminatorInst *Term = BB->getTerminator(); +      Instruction *Term = BB->getTerminator();        BranchInst::Create(OnlyDest, Term);        Term->eraseFromParent(); -      DDT->applyUpdates(Updates); +      DTU->applyUpdates(Updates);        // If the condition is now dead due to the removal of the old terminator,        // erase it. @@ -1945,7 +1952,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,                      << "' with cost: " << JumpThreadCost                      << ", across block:\n    " << *BB << "\n"); -  if (DDT->pending()) +  if (DTU->hasPendingDomTreeUpdates())      LVI->disableDT();    else      LVI->enableDT(); @@ -1974,7 +1981,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,    // Clone the non-phi instructions of BB into NewBB, keeping track of the    // mapping and using it to remap operands in the cloned instructions. -  for (; !isa<TerminatorInst>(BI); ++BI) { +  for (; !BI->isTerminator(); ++BI) {      Instruction *New = BI->clone();      New->setName(BI->getName());      NewBB->getInstList().push_back(New); @@ -2001,7 +2008,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,    // Update the terminator of PredBB to jump to NewBB instead of BB.  This    // eliminates predecessors from BB, which requires us to simplify any PHI    // nodes in BB. -  TerminatorInst *PredTerm = PredBB->getTerminator(); +  Instruction *PredTerm = PredBB->getTerminator();    for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)      if (PredTerm->getSuccessor(i) == BB) {        BB->removePredecessor(PredBB, true); @@ -2009,7 +2016,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,      }    // Enqueue required DT updates. -  DDT->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}, +  DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},                       {DominatorTree::Insert, PredBB, NewBB},                       {DominatorTree::Delete, PredBB, BB}}); @@ -2105,12 +2112,12 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,        BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());    } -  DDT->applyUpdates(Updates); +  DTU->applyUpdates(Updates);    return NewBBs[0];  }  bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) { -  const TerminatorInst *TI = BB->getTerminator(); +  const Instruction *TI = BB->getTerminator();    assert(TI->getNumSuccessors() > 1 && "not a split");    MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); @@ -2378,12 +2385,78 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(    // Remove the unconditional branch at the end of the PredBB block.    OldPredBranch->eraseFromParent(); -  DDT->applyUpdates(Updates); +  DTU->applyUpdates(Updates);    ++NumDupes;    return true;  } +// Pred is a predecessor of BB with an unconditional branch to BB. SI is +// a Select instruction in Pred. BB has other predecessors and SI is used in +// a PHI node in BB. SI has no other use. +// A new basic block, NewBB, is created and SI is converted to compare and  +// conditional branch. SI is erased from parent. +void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, +                                          SelectInst *SI, PHINode *SIUse, +                                          unsigned Idx) { +  // Expand the select. +  // +  // Pred -- +  //  |    v +  //  |  NewBB +  //  |    | +  //  |----- +  //  v +  // BB +  BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator()); +  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold", +                                         BB->getParent(), BB); +  // Move the unconditional branch to NewBB. +  PredTerm->removeFromParent(); +  NewBB->getInstList().insert(NewBB->end(), PredTerm); +  // Create a conditional branch and update PHI nodes. +  BranchInst::Create(NewBB, BB, SI->getCondition(), Pred); +  SIUse->setIncomingValue(Idx, SI->getFalseValue()); +  SIUse->addIncoming(SI->getTrueValue(), NewBB); + +  // The select is now dead. +  SI->eraseFromParent(); +  DTU->applyUpdates({{DominatorTree::Insert, NewBB, BB}, +                    {DominatorTree::Insert, Pred, NewBB}}); + +  // Update any other PHI nodes in BB. +  for (BasicBlock::iterator BI = BB->begin(); +       PHINode *Phi = dyn_cast<PHINode>(BI); ++BI) +    if (Phi != SIUse) +      Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB); +} + +bool JumpThreadingPass::TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) { +  PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition()); + +  if (!CondPHI || CondPHI->getParent() != BB) +    return false; + +  for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) { +    BasicBlock *Pred = CondPHI->getIncomingBlock(I); +    SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I)); + +    // The second and third condition can be potentially relaxed. Currently +    // the conditions help to simplify the code and allow us to reuse existing +    // code, developed for TryToUnfoldSelect(CmpInst *, BasicBlock *) +    if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse()) +      continue; + +    BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator()); +    if (!PredTerm || !PredTerm->isUnconditional()) +      continue; + +    UnfoldSelectInstr(Pred, BB, PredSI, CondPHI, I); +    return true; +  } +  return false; +} +  /// TryToUnfoldSelect - Look for blocks of the form  /// bb1:  ///   %a = select @@ -2421,7 +2494,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {      // Now check if one of the select values would allow us to constant fold the      // terminator in BB. We don't do the transform if both sides fold, those      // cases will be threaded in any case. -    if (DDT->pending()) +    if (DTU->hasPendingDomTreeUpdates())        LVI->disableDT();      else        LVI->enableDT(); @@ -2434,34 +2507,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {      if ((LHSFolds != LazyValueInfo::Unknown ||           RHSFolds != LazyValueInfo::Unknown) &&          LHSFolds != RHSFolds) { -      // Expand the select. -      // -      // Pred -- -      //  |    v -      //  |  NewBB -      //  |    | -      //  |----- -      //  v -      // BB -      BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold", -                                             BB->getParent(), BB); -      // Move the unconditional branch to NewBB. -      PredTerm->removeFromParent(); -      NewBB->getInstList().insert(NewBB->end(), PredTerm); -      // Create a conditional branch and update PHI nodes. -      BranchInst::Create(NewBB, BB, SI->getCondition(), Pred); -      CondLHS->setIncomingValue(I, SI->getFalseValue()); -      CondLHS->addIncoming(SI->getTrueValue(), NewBB); -      // The select is now dead. -      SI->eraseFromParent(); - -      DDT->applyUpdates({{DominatorTree::Insert, NewBB, BB}, -                         {DominatorTree::Insert, Pred, NewBB}}); -      // Update any other PHI nodes in BB. -      for (BasicBlock::iterator BI = BB->begin(); -           PHINode *Phi = dyn_cast<PHINode>(BI); ++BI) -        if (Phi != CondLHS) -          Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB); +      UnfoldSelectInstr(Pred, BB, SI, CondLHS, I);        return true;      }    } @@ -2533,7 +2579,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {      if (!SI)        continue;      // Expand the select. -    TerminatorInst *Term = +    Instruction *Term =          SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);      BasicBlock *SplitBB = SI->getParent();      BasicBlock *NewBB = Term->getParent(); @@ -2548,12 +2594,12 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {      Updates.push_back({DominatorTree::Insert, BB, SplitBB});      Updates.push_back({DominatorTree::Insert, BB, NewBB});      Updates.push_back({DominatorTree::Insert, NewBB, SplitBB}); -    // BB's successors were moved to SplitBB, update DDT accordingly. +    // BB's successors were moved to SplitBB, update DTU accordingly.      for (auto *Succ : successors(SplitBB)) {        Updates.push_back({DominatorTree::Delete, BB, Succ});        Updates.push_back({DominatorTree::Insert, SplitBB, Succ});      } -    DDT->applyUpdates(Updates); +    DTU->applyUpdates(Updates);      return true;    }    return false; @@ -2603,9 +2649,8 @@ bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {    if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))      for (auto &I : *BB) -      if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>())) -        if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI)) -          return true; +      if (isGuard(&I) && ThreadGuard(BB, cast<IntrinsicInst>(&I), BI)) +        return true;    return false;  } @@ -2651,28 +2696,16 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,    // Duplicate all instructions before the guard and the guard itself to the    // branch where implication is not proved.    BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween( -      BB, PredGuardedBlock, AfterGuard, GuardedMapping); +      BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);    assert(GuardedBlock && "Could not create the guarded block?");    // Duplicate all instructions before the guard in the unguarded branch.    // Since we have successfully duplicated the guarded block and this block    // has fewer instructions, we expect it to succeed.    BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween( -      BB, PredUnguardedBlock, Guard, UnguardedMapping); +      BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);    assert(UnguardedBlock && "Could not create the unguarded block?");    LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "                      << GuardedBlock->getName() << "\n"); -  // DuplicateInstructionsInSplitBetween inserts a new block "BB.split" between -  // PredBB and BB. We need to perform two inserts and one delete for each of -  // the above calls to update Dominators. -  DDT->applyUpdates( -      {// Guarded block split. -       {DominatorTree::Delete, PredGuardedBlock, BB}, -       {DominatorTree::Insert, PredGuardedBlock, GuardedBlock}, -       {DominatorTree::Insert, GuardedBlock, BB}, -       // Unguarded block split. -       {DominatorTree::Delete, PredUnguardedBlock, BB}, -       {DominatorTree::Insert, PredUnguardedBlock, UnguardedBlock}, -       {DominatorTree::Insert, UnguardedBlock, BB}});    // Some instructions before the guard may still have uses. For them, we need    // to create Phi nodes merging their copies in both guarded and unguarded    // branches. Those instructions that have no uses can be just removed.  | 
