diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Transforms/Scalar/JumpThreading.cpp | |
parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) |
Notes
Diffstat (limited to 'lib/Transforms/Scalar/JumpThreading.cpp')
-rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 255 |
1 files changed, 144 insertions, 111 deletions
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 1d66472f93c8..48de56a02834 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -25,12 +25,12 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -38,6 +38,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -65,6 +66,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> @@ -285,7 +287,7 @@ bool JumpThreading::runOnFunction(Function &F) { auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - DeferredDominance DDT(*DT); + DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy); std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; bool HasProfileData = F.hasProfileData(); @@ -295,7 +297,7 @@ bool JumpThreading::runOnFunction(Function &F) { BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DDT, HasProfileData, + bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, HasProfileData, std::move(BFI), std::move(BPI)); if (PrintLVIAfterJumpThreading) { dbgs() << "LVI for function '" << F.getName() << "':\n"; @@ -312,7 +314,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &LVI = AM.getResult<LazyValueAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); - DeferredDominance DDT(DT); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; @@ -322,7 +324,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = runImpl(F, &TLI, &LVI, &AA, &DDT, HasProfileData, + bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, HasProfileData, std::move(BFI), std::move(BPI)); if (!Changed) @@ -336,14 +338,14 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, AliasAnalysis *AA_, - DeferredDominance *DDT_, bool HasProfileData_, + DomTreeUpdater *DTU_, bool HasProfileData_, std::unique_ptr<BlockFrequencyInfo> BFI_, std::unique_ptr<BranchProbabilityInfo> BPI_) { LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = TLI_; LVI = LVI_; AA = AA_; - DDT = DDT_; + DTU = DTU_; BFI.reset(); BPI.reset(); // When profile data is available, we need to update edge weights after @@ -360,7 +362,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // JumpThreading must not processes blocks unreachable from entry. It's a // waste of compute time and can potentially lead to hangs. SmallPtrSet<BasicBlock *, 16> Unreachable; - DominatorTree &DT = DDT->flush(); + assert(DTU && "DTU isn't passed into JumpThreading before using it."); + assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed."); + DominatorTree &DT = DTU->getDomTree(); for (auto &BB : F) if (!DT.isReachableFromEntry(&BB)) Unreachable.insert(&BB); @@ -379,7 +383,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // Stop processing BB if it's the entry or is now deleted. The following // routines attempt to eliminate BB and locating a suitable replacement // for the entry is non-trivial. - if (&BB == &F.getEntryBlock() || DDT->pendingDeletedBB(&BB)) + if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB)) continue; if (pred_empty(&BB)) { @@ -390,7 +394,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, << '\n'); LoopHeaders.erase(&BB); LVI->eraseBlock(&BB); - DeleteDeadBlock(&BB, DDT); + DeleteDeadBlock(&BB, DTU); Changed = true; continue; } @@ -404,9 +408,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // Don't alter Loop headers and latches to ensure another pass can // detect and transform nested loops later. !LoopHeaders.count(&BB) && !LoopHeaders.count(BI->getSuccessor(0)) && - TryToSimplifyUncondBranchFromEmptyBlock(&BB, DDT)) { - // BB is valid for cleanup here because we passed in DDT. F remains - // BB's parent until a DDT->flush() event. + TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) { + // BB is valid for cleanup here because we passed in DTU. F remains + // BB's parent until a DTU->getDomTree() event. LVI->eraseBlock(&BB); Changed = true; } @@ -415,7 +419,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, } while (Changed); LoopHeaders.clear(); - DDT->flush(); + // Flush only the Dominator Tree. + DTU->getDomTree(); LVI->enableDT(); return EverChanged; } @@ -569,9 +574,11 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) { /// BB in the result vector. /// /// This returns true if there were any known values. -bool JumpThreadingPass::ComputeValueKnownInPredecessors( +bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( Value *V, BasicBlock *BB, PredValueInfo &Result, - ConstantPreference Preference, Instruction *CxtI) { + ConstantPreference Preference, + DenseSet<std::pair<Value *, BasicBlock *>> &RecursionSet, + Instruction *CxtI) { // This method walks up use-def chains recursively. Because of this, we could // get into an infinite loop going around loops in the use-def chain. To // prevent this, keep track of what (value, block) pairs we've already visited @@ -579,10 +586,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( if (!RecursionSet.insert(std::make_pair(V, BB)).second) return false; - // An RAII help to remove this pair from the recursion set once the recursion - // stack pops back out again. - RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB)); - // If V is a constant, then it is known in all predecessors. if (Constant *KC = getKnownConstant(V, Preference)) { for (BasicBlock *Pred : predecessors(BB)) @@ -609,7 +612,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. // Perhaps getConstantOnEdge should be smart enough to do this? - if (DDT->pending()) + if (DTU->hasPendingDomTreeUpdates()) LVI->disableDT(); else LVI->enableDT(); @@ -626,7 +629,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( /// If I is a PHI node, then we know the incoming values for any constants. if (PHINode *PN = dyn_cast<PHINode>(I)) { - if (DDT->pending()) + if (DTU->hasPendingDomTreeUpdates()) LVI->disableDT(); else LVI->enableDT(); @@ -652,7 +655,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( Value *Source = CI->getOperand(0); if (!isa<PHINode>(Source) && !isa<CmpInst>(Source)) return false; - ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI); + ComputeValueKnownInPredecessorsImpl(Source, BB, Result, Preference, + RecursionSet, CxtI); if (Result.empty()) return false; @@ -672,10 +676,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( I->getOpcode() == Instruction::And) { PredValueInfoTy LHSVals, RHSVals; - ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, - WantInteger, CxtI); - ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals, - WantInteger, CxtI); + ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals, + WantInteger, RecursionSet, CxtI); + ComputeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals, + WantInteger, RecursionSet, CxtI); if (LHSVals.empty() && RHSVals.empty()) return false; @@ -710,8 +714,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( if (I->getOpcode() == Instruction::Xor && isa<ConstantInt>(I->getOperand(1)) && cast<ConstantInt>(I->getOperand(1))->isOne()) { - ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result, - WantInteger, CxtI); + ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result, + WantInteger, RecursionSet, CxtI); if (Result.empty()) return false; @@ -728,8 +732,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( && "A binary operator creating a block address?"); if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { PredValueInfoTy LHSVals; - ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals, - WantInteger, CxtI); + ComputeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals, + WantInteger, RecursionSet, CxtI); // Try to use constant folding to simplify the binary operator. for (const auto &LHSVal : LHSVals) { @@ -759,7 +763,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( const DataLayout &DL = PN->getModule()->getDataLayout(); // We can do this simplification if any comparisons fold to true or false. // See if any do. - if (DDT->pending()) + if (DTU->hasPendingDomTreeUpdates()) LVI->disableDT(); else LVI->enableDT(); @@ -806,7 +810,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( if (!isa<Instruction>(CmpLHS) || cast<Instruction>(CmpLHS)->getParent() != BB) { - if (DDT->pending()) + if (DTU->hasPendingDomTreeUpdates()) LVI->disableDT(); else LVI->enableDT(); @@ -838,7 +842,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) { if (!isa<Instruction>(AddLHS) || cast<Instruction>(AddLHS)->getParent() != BB) { - if (DDT->pending()) + if (DTU->hasPendingDomTreeUpdates()) LVI->disableDT(); else LVI->enableDT(); @@ -874,8 +878,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // Try to find a constant value for the LHS of a comparison, // and evaluate it statically if we can. PredValueInfoTy LHSVals; - ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, - WantInteger, CxtI); + ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals, + WantInteger, RecursionSet, CxtI); for (const auto &LHSVal : LHSVals) { Constant *V = LHSVal.first; @@ -895,8 +899,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference); PredValueInfoTy Conds; if ((TrueVal || FalseVal) && - ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds, - WantInteger, CxtI)) { + ComputeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds, + WantInteger, RecursionSet, CxtI)) { for (auto &C : Conds) { Constant *Cond = C.first; @@ -923,7 +927,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( } // If all else fails, see if LVI can figure out a constant value for us. - if (DDT->pending()) + if (DTU->hasPendingDomTreeUpdates()) LVI->disableDT(); else LVI->enableDT(); @@ -942,7 +946,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( /// Since we can pick an arbitrary destination, we pick the successor with the /// fewest predecessors. This should reduce the in-degree of the others. static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { - TerminatorInst *BBTerm = BB->getTerminator(); + Instruction *BBTerm = BB->getTerminator(); unsigned MinSucc = 0; BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); // Compute the successor with the minimum number of predecessors. @@ -974,7 +978,7 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) { bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // If the block is trivially dead, just return and let the caller nuke it. // This simplifies other transformations. - if (DDT->pendingDeletedBB(BB) || + if (DTU->isBBPendingDeletion(BB) || (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock())) return false; @@ -983,15 +987,15 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // because now the condition in this block can be threaded through // predecessors of our predecessor block. if (BasicBlock *SinglePred = BB->getSinglePredecessor()) { - const TerminatorInst *TI = SinglePred->getTerminator(); - if (!TI->isExceptional() && TI->getNumSuccessors() == 1 && + const Instruction *TI = SinglePred->getTerminator(); + if (!TI->isExceptionalTerminator() && TI->getNumSuccessors() == 1 && SinglePred != BB && !hasAddressTakenAndUsed(BB)) { // If SinglePred was a loop header, BB becomes one. if (LoopHeaders.erase(SinglePred)) LoopHeaders.insert(BB); LVI->eraseBlock(SinglePred); - MergeBasicBlockIntoOnlyPred(BB, nullptr, DDT); + MergeBasicBlockIntoOnlyPred(BB, DTU); // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by // BB code within one basic block `BB`), we need to invalidate the LVI @@ -1075,7 +1079,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { std::vector<DominatorTree::UpdateType> Updates; // Fold the branch/switch. - TerminatorInst *BBTerm = BB->getTerminator(); + Instruction *BBTerm = BB->getTerminator(); Updates.reserve(BBTerm->getNumSuccessors()); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { if (i == BestSucc) continue; @@ -1088,7 +1092,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { << "' folding undef terminator: " << *BBTerm << '\n'); BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); - DDT->applyUpdates(Updates); + DTU->applyUpdates(Updates); return true; } @@ -1100,7 +1104,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; - ConstantFoldTerminator(BB, true, nullptr, DDT); + ConstantFoldTerminator(BB, true, nullptr, DTU); return true; } @@ -1127,7 +1131,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // threading is concerned. assert(CondBr->isConditional() && "Threading on unconditional terminator"); - if (DDT->pending()) + if (DTU->hasPendingDomTreeUpdates()) LVI->disableDT(); else LVI->enableDT(); @@ -1156,7 +1160,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { ConstantInt::getFalse(CondCmp->getType()); ReplaceFoldableUses(CondCmp, CI); } - DDT->deleteEdge(BB, ToRemoveSucc); + DTU->deleteEdgeRelaxed(BB, ToRemoveSucc); return true; } @@ -1167,6 +1171,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { } } + if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) + TryToUnfoldSelect(SI, BB); + // Check for some cases that are worth simplifying. Right now we want to look // for loads that are used by a switch or by the condition for the branch. If // we see one, check to see if it's partially redundant. If so, insert a PHI @@ -1240,7 +1247,7 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) { RemoveSucc->removePredecessor(BB); BranchInst::Create(KeepSucc, BI); BI->eraseFromParent(); - DDT->deleteEdge(BB, RemoveSucc); + DTU->deleteEdgeRelaxed(BB, RemoveSucc); return true; } CurrentBB = CurrentPred; @@ -1296,7 +1303,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) { if (IsLoadCSE) { LoadInst *NLoadI = cast<LoadInst>(AvailableVal); - combineMetadataForCSE(NLoadI, LoadI); + combineMetadataForCSE(NLoadI, LoadI, false); }; // If the returned value is the load itself, replace with an undef. This can @@ -1486,7 +1493,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) { } for (LoadInst *PredLoadI : CSELoads) { - combineMetadataForCSE(PredLoadI, LoadI); + combineMetadataForCSE(PredLoadI, LoadI, true); } LoadI->replaceAllUsesWith(PN); @@ -1544,7 +1551,7 @@ FindMostPopularDest(BasicBlock *BB, // successor list. if (!SamePopularity.empty()) { SamePopularity.push_back(MostPopularDest); - TerminatorInst *TI = BB->getTerminator(); + Instruction *TI = BB->getTerminator(); for (unsigned i = 0; ; ++i) { assert(i != TI->getNumSuccessors() && "Didn't find any successor!"); @@ -1664,10 +1671,10 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, } // Finally update the terminator. - TerminatorInst *Term = BB->getTerminator(); + Instruction *Term = BB->getTerminator(); BranchInst::Create(OnlyDest, Term); Term->eraseFromParent(); - DDT->applyUpdates(Updates); + DTU->applyUpdates(Updates); // If the condition is now dead due to the removal of the old terminator, // erase it. @@ -1945,7 +1952,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, << "' with cost: " << JumpThreadCost << ", across block:\n " << *BB << "\n"); - if (DDT->pending()) + if (DTU->hasPendingDomTreeUpdates()) LVI->disableDT(); else LVI->enableDT(); @@ -1974,7 +1981,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, // Clone the non-phi instructions of BB into NewBB, keeping track of the // mapping and using it to remap operands in the cloned instructions. - for (; !isa<TerminatorInst>(BI); ++BI) { + for (; !BI->isTerminator(); ++BI) { Instruction *New = BI->clone(); New->setName(BI->getName()); NewBB->getInstList().push_back(New); @@ -2001,7 +2008,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, // Update the terminator of PredBB to jump to NewBB instead of BB. This // eliminates predecessors from BB, which requires us to simplify any PHI // nodes in BB. - TerminatorInst *PredTerm = PredBB->getTerminator(); + Instruction *PredTerm = PredBB->getTerminator(); for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) if (PredTerm->getSuccessor(i) == BB) { BB->removePredecessor(PredBB, true); @@ -2009,7 +2016,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, } // Enqueue required DT updates. - DDT->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}, + DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}, {DominatorTree::Insert, PredBB, NewBB}, {DominatorTree::Delete, PredBB, BB}}); @@ -2105,12 +2112,12 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB, BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency()); } - DDT->applyUpdates(Updates); + DTU->applyUpdates(Updates); return NewBBs[0]; } bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) { - const TerminatorInst *TI = BB->getTerminator(); + const Instruction *TI = BB->getTerminator(); assert(TI->getNumSuccessors() > 1 && "not a split"); MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); @@ -2378,12 +2385,78 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // Remove the unconditional branch at the end of the PredBB block. OldPredBranch->eraseFromParent(); - DDT->applyUpdates(Updates); + DTU->applyUpdates(Updates); ++NumDupes; return true; } +// Pred is a predecessor of BB with an unconditional branch to BB. SI is +// a Select instruction in Pred. BB has other predecessors and SI is used in +// a PHI node in BB. SI has no other use. +// A new basic block, NewBB, is created and SI is converted to compare and +// conditional branch. SI is erased from parent. +void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, + SelectInst *SI, PHINode *SIUse, + unsigned Idx) { + // Expand the select. + // + // Pred -- + // | v + // | NewBB + // | | + // |----- + // v + // BB + BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator()); + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold", + BB->getParent(), BB); + // Move the unconditional branch to NewBB. + PredTerm->removeFromParent(); + NewBB->getInstList().insert(NewBB->end(), PredTerm); + // Create a conditional branch and update PHI nodes. + BranchInst::Create(NewBB, BB, SI->getCondition(), Pred); + SIUse->setIncomingValue(Idx, SI->getFalseValue()); + SIUse->addIncoming(SI->getTrueValue(), NewBB); + + // The select is now dead. + SI->eraseFromParent(); + DTU->applyUpdates({{DominatorTree::Insert, NewBB, BB}, + {DominatorTree::Insert, Pred, NewBB}}); + + // Update any other PHI nodes in BB. + for (BasicBlock::iterator BI = BB->begin(); + PHINode *Phi = dyn_cast<PHINode>(BI); ++BI) + if (Phi != SIUse) + Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB); +} + +bool JumpThreadingPass::TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) { + PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition()); + + if (!CondPHI || CondPHI->getParent() != BB) + return false; + + for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) { + BasicBlock *Pred = CondPHI->getIncomingBlock(I); + SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I)); + + // The second and third condition can be potentially relaxed. Currently + // the conditions help to simplify the code and allow us to reuse existing + // code, developed for TryToUnfoldSelect(CmpInst *, BasicBlock *) + if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse()) + continue; + + BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator()); + if (!PredTerm || !PredTerm->isUnconditional()) + continue; + + UnfoldSelectInstr(Pred, BB, PredSI, CondPHI, I); + return true; + } + return false; +} + /// TryToUnfoldSelect - Look for blocks of the form /// bb1: /// %a = select @@ -2421,7 +2494,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { // Now check if one of the select values would allow us to constant fold the // terminator in BB. We don't do the transform if both sides fold, those // cases will be threaded in any case. - if (DDT->pending()) + if (DTU->hasPendingDomTreeUpdates()) LVI->disableDT(); else LVI->enableDT(); @@ -2434,34 +2507,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { if ((LHSFolds != LazyValueInfo::Unknown || RHSFolds != LazyValueInfo::Unknown) && LHSFolds != RHSFolds) { - // Expand the select. - // - // Pred -- - // | v - // | NewBB - // | | - // |----- - // v - // BB - BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold", - BB->getParent(), BB); - // Move the unconditional branch to NewBB. - PredTerm->removeFromParent(); - NewBB->getInstList().insert(NewBB->end(), PredTerm); - // Create a conditional branch and update PHI nodes. - BranchInst::Create(NewBB, BB, SI->getCondition(), Pred); - CondLHS->setIncomingValue(I, SI->getFalseValue()); - CondLHS->addIncoming(SI->getTrueValue(), NewBB); - // The select is now dead. - SI->eraseFromParent(); - - DDT->applyUpdates({{DominatorTree::Insert, NewBB, BB}, - {DominatorTree::Insert, Pred, NewBB}}); - // Update any other PHI nodes in BB. - for (BasicBlock::iterator BI = BB->begin(); - PHINode *Phi = dyn_cast<PHINode>(BI); ++BI) - if (Phi != CondLHS) - Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB); + UnfoldSelectInstr(Pred, BB, SI, CondLHS, I); return true; } } @@ -2533,7 +2579,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { if (!SI) continue; // Expand the select. - TerminatorInst *Term = + Instruction *Term = SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); BasicBlock *SplitBB = SI->getParent(); BasicBlock *NewBB = Term->getParent(); @@ -2548,12 +2594,12 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { Updates.push_back({DominatorTree::Insert, BB, SplitBB}); Updates.push_back({DominatorTree::Insert, BB, NewBB}); Updates.push_back({DominatorTree::Insert, NewBB, SplitBB}); - // BB's successors were moved to SplitBB, update DDT accordingly. + // BB's successors were moved to SplitBB, update DTU accordingly. for (auto *Succ : successors(SplitBB)) { Updates.push_back({DominatorTree::Delete, BB, Succ}); Updates.push_back({DominatorTree::Insert, SplitBB, Succ}); } - DDT->applyUpdates(Updates); + DTU->applyUpdates(Updates); return true; } return false; @@ -2603,9 +2649,8 @@ bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) { if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator())) for (auto &I : *BB) - if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>())) - if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI)) - return true; + if (isGuard(&I) && ThreadGuard(BB, cast<IntrinsicInst>(&I), BI)) + return true; return false; } @@ -2651,28 +2696,16 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, // Duplicate all instructions before the guard and the guard itself to the // branch where implication is not proved. BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween( - BB, PredGuardedBlock, AfterGuard, GuardedMapping); + BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU); assert(GuardedBlock && "Could not create the guarded block?"); // Duplicate all instructions before the guard in the unguarded branch. // Since we have successfully duplicated the guarded block and this block // has fewer instructions, we expect it to succeed. BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween( - BB, PredUnguardedBlock, Guard, UnguardedMapping); + BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU); assert(UnguardedBlock && "Could not create the unguarded block?"); LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block " << GuardedBlock->getName() << "\n"); - // DuplicateInstructionsInSplitBetween inserts a new block "BB.split" between - // PredBB and BB. We need to perform two inserts and one delete for each of - // the above calls to update Dominators. - DDT->applyUpdates( - {// Guarded block split. - {DominatorTree::Delete, PredGuardedBlock, BB}, - {DominatorTree::Insert, PredGuardedBlock, GuardedBlock}, - {DominatorTree::Insert, GuardedBlock, BB}, - // Unguarded block split. - {DominatorTree::Delete, PredUnguardedBlock, BB}, - {DominatorTree::Insert, PredUnguardedBlock, UnguardedBlock}, - {DominatorTree::Insert, UnguardedBlock, BB}}); // Some instructions before the guard may still have uses. For them, we need // to create Phi nodes merging their copies in both guarded and unguarded // branches. Those instructions that have no uses can be just removed. |