summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/JumpThreading.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
commitd8e91e46262bc44006913e6796843909f1ac7bcd (patch)
tree7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Transforms/Scalar/JumpThreading.cpp
parentb7eb8e35e481a74962664b63dfb09483b200209a (diff)
Notes
Diffstat (limited to 'lib/Transforms/Scalar/JumpThreading.cpp')
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp255
1 files changed, 144 insertions, 111 deletions
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 1d66472f93c8..48de56a02834 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -25,12 +25,12 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -38,6 +38,7 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -65,6 +66,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
@@ -285,7 +287,7 @@ bool JumpThreading::runOnFunction(Function &F) {
auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DeferredDominance DDT(*DT);
+ DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData = F.hasProfileData();
@@ -295,7 +297,7 @@ bool JumpThreading::runOnFunction(Function &F) {
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DDT, HasProfileData,
+ bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, HasProfileData,
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
@@ -312,7 +314,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
- DeferredDominance DDT(DT);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
@@ -322,7 +324,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = runImpl(F, &TLI, &LVI, &AA, &DDT, HasProfileData,
+ bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, HasProfileData,
std::move(BFI), std::move(BPI));
if (!Changed)
@@ -336,14 +338,14 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
LazyValueInfo *LVI_, AliasAnalysis *AA_,
- DeferredDominance *DDT_, bool HasProfileData_,
+ DomTreeUpdater *DTU_, bool HasProfileData_,
std::unique_ptr<BlockFrequencyInfo> BFI_,
std::unique_ptr<BranchProbabilityInfo> BPI_) {
LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = TLI_;
LVI = LVI_;
AA = AA_;
- DDT = DDT_;
+ DTU = DTU_;
BFI.reset();
BPI.reset();
// When profile data is available, we need to update edge weights after
@@ -360,7 +362,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// JumpThreading must not processes blocks unreachable from entry. It's a
// waste of compute time and can potentially lead to hangs.
SmallPtrSet<BasicBlock *, 16> Unreachable;
- DominatorTree &DT = DDT->flush();
+ assert(DTU && "DTU isn't passed into JumpThreading before using it.");
+ assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
+ DominatorTree &DT = DTU->getDomTree();
for (auto &BB : F)
if (!DT.isReachableFromEntry(&BB))
Unreachable.insert(&BB);
@@ -379,7 +383,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// Stop processing BB if it's the entry or is now deleted. The following
// routines attempt to eliminate BB and locating a suitable replacement
// for the entry is non-trivial.
- if (&BB == &F.getEntryBlock() || DDT->pendingDeletedBB(&BB))
+ if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))
continue;
if (pred_empty(&BB)) {
@@ -390,7 +394,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
<< '\n');
LoopHeaders.erase(&BB);
LVI->eraseBlock(&BB);
- DeleteDeadBlock(&BB, DDT);
+ DeleteDeadBlock(&BB, DTU);
Changed = true;
continue;
}
@@ -404,9 +408,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// Don't alter Loop headers and latches to ensure another pass can
// detect and transform nested loops later.
!LoopHeaders.count(&BB) && !LoopHeaders.count(BI->getSuccessor(0)) &&
- TryToSimplifyUncondBranchFromEmptyBlock(&BB, DDT)) {
- // BB is valid for cleanup here because we passed in DDT. F remains
- // BB's parent until a DDT->flush() event.
+ TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
+ // BB is valid for cleanup here because we passed in DTU. F remains
+ // BB's parent until a DTU->getDomTree() event.
LVI->eraseBlock(&BB);
Changed = true;
}
@@ -415,7 +419,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
} while (Changed);
LoopHeaders.clear();
- DDT->flush();
+ // Flush only the Dominator Tree.
+ DTU->getDomTree();
LVI->enableDT();
return EverChanged;
}
@@ -569,9 +574,11 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
/// BB in the result vector.
///
/// This returns true if there were any known values.
-bool JumpThreadingPass::ComputeValueKnownInPredecessors(
+bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
Value *V, BasicBlock *BB, PredValueInfo &Result,
- ConstantPreference Preference, Instruction *CxtI) {
+ ConstantPreference Preference,
+ DenseSet<std::pair<Value *, BasicBlock *>> &RecursionSet,
+ Instruction *CxtI) {
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
// prevent this, keep track of what (value, block) pairs we've already visited
@@ -579,10 +586,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
if (!RecursionSet.insert(std::make_pair(V, BB)).second)
return false;
- // An RAII help to remove this pair from the recursion set once the recursion
- // stack pops back out again.
- RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
-
// If V is a constant, then it is known in all predecessors.
if (Constant *KC = getKnownConstant(V, Preference)) {
for (BasicBlock *Pred : predecessors(BB))
@@ -609,7 +612,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
// "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
// Perhaps getConstantOnEdge should be smart enough to do this?
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -626,7 +629,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
/// If I is a PHI node, then we know the incoming values for any constants.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -652,7 +655,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
Value *Source = CI->getOperand(0);
if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
return false;
- ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
+ ComputeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
+ RecursionSet, CxtI);
if (Result.empty())
return false;
@@ -672,10 +676,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
I->getOpcode() == Instruction::And) {
PredValueInfoTy LHSVals, RHSVals;
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
- WantInteger, CxtI);
- ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
- WantInteger, CxtI);
+ ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
+ WantInteger, RecursionSet, CxtI);
+ ComputeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals,
+ WantInteger, RecursionSet, CxtI);
if (LHSVals.empty() && RHSVals.empty())
return false;
@@ -710,8 +714,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
if (I->getOpcode() == Instruction::Xor &&
isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isOne()) {
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
- WantInteger, CxtI);
+ ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
+ WantInteger, RecursionSet, CxtI);
if (Result.empty())
return false;
@@ -728,8 +732,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
&& "A binary operator creating a block address?");
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
PredValueInfoTy LHSVals;
- ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
- WantInteger, CxtI);
+ ComputeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
+ WantInteger, RecursionSet, CxtI);
// Try to use constant folding to simplify the binary operator.
for (const auto &LHSVal : LHSVals) {
@@ -759,7 +763,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
const DataLayout &DL = PN->getModule()->getDataLayout();
// We can do this simplification if any comparisons fold to true or false.
// See if any do.
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -806,7 +810,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
if (!isa<Instruction>(CmpLHS) ||
cast<Instruction>(CmpLHS)->getParent() != BB) {
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -838,7 +842,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
if (!isa<Instruction>(AddLHS) ||
cast<Instruction>(AddLHS)->getParent() != BB) {
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -874,8 +878,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
// Try to find a constant value for the LHS of a comparison,
// and evaluate it statically if we can.
PredValueInfoTy LHSVals;
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
- WantInteger, CxtI);
+ ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
+ WantInteger, RecursionSet, CxtI);
for (const auto &LHSVal : LHSVals) {
Constant *V = LHSVal.first;
@@ -895,8 +899,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
PredValueInfoTy Conds;
if ((TrueVal || FalseVal) &&
- ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
- WantInteger, CxtI)) {
+ ComputeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
+ WantInteger, RecursionSet, CxtI)) {
for (auto &C : Conds) {
Constant *Cond = C.first;
@@ -923,7 +927,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
}
// If all else fails, see if LVI can figure out a constant value for us.
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -942,7 +946,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
/// Since we can pick an arbitrary destination, we pick the successor with the
/// fewest predecessors. This should reduce the in-degree of the others.
static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
- TerminatorInst *BBTerm = BB->getTerminator();
+ Instruction *BBTerm = BB->getTerminator();
unsigned MinSucc = 0;
BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
// Compute the successor with the minimum number of predecessors.
@@ -974,7 +978,7 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) {
bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// If the block is trivially dead, just return and let the caller nuke it.
// This simplifies other transformations.
- if (DDT->pendingDeletedBB(BB) ||
+ if (DTU->isBBPendingDeletion(BB) ||
(pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
return false;
@@ -983,15 +987,15 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
- const TerminatorInst *TI = SinglePred->getTerminator();
- if (!TI->isExceptional() && TI->getNumSuccessors() == 1 &&
+ const Instruction *TI = SinglePred->getTerminator();
+ if (!TI->isExceptionalTerminator() && TI->getNumSuccessors() == 1 &&
SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
// If SinglePred was a loop header, BB becomes one.
if (LoopHeaders.erase(SinglePred))
LoopHeaders.insert(BB);
LVI->eraseBlock(SinglePred);
- MergeBasicBlockIntoOnlyPred(BB, nullptr, DDT);
+ MergeBasicBlockIntoOnlyPred(BB, DTU);
// Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
// BB code within one basic block `BB`), we need to invalidate the LVI
@@ -1075,7 +1079,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
std::vector<DominatorTree::UpdateType> Updates;
// Fold the branch/switch.
- TerminatorInst *BBTerm = BB->getTerminator();
+ Instruction *BBTerm = BB->getTerminator();
Updates.reserve(BBTerm->getNumSuccessors());
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
if (i == BestSucc) continue;
@@ -1088,7 +1092,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
<< "' folding undef terminator: " << *BBTerm << '\n');
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
- DDT->applyUpdates(Updates);
+ DTU->applyUpdates(Updates);
return true;
}
@@ -1100,7 +1104,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
<< "' folding terminator: " << *BB->getTerminator()
<< '\n');
++NumFolds;
- ConstantFoldTerminator(BB, true, nullptr, DDT);
+ ConstantFoldTerminator(BB, true, nullptr, DTU);
return true;
}
@@ -1127,7 +1131,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// threading is concerned.
assert(CondBr->isConditional() && "Threading on unconditional terminator");
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -1156,7 +1160,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
ConstantInt::getFalse(CondCmp->getType());
ReplaceFoldableUses(CondCmp, CI);
}
- DDT->deleteEdge(BB, ToRemoveSucc);
+ DTU->deleteEdgeRelaxed(BB, ToRemoveSucc);
return true;
}
@@ -1167,6 +1171,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
}
}
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ TryToUnfoldSelect(SI, BB);
+
// Check for some cases that are worth simplifying. Right now we want to look
// for loads that are used by a switch or by the condition for the branch. If
// we see one, check to see if it's partially redundant. If so, insert a PHI
@@ -1240,7 +1247,7 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
RemoveSucc->removePredecessor(BB);
BranchInst::Create(KeepSucc, BI);
BI->eraseFromParent();
- DDT->deleteEdge(BB, RemoveSucc);
+ DTU->deleteEdgeRelaxed(BB, RemoveSucc);
return true;
}
CurrentBB = CurrentPred;
@@ -1296,7 +1303,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
if (IsLoadCSE) {
LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
- combineMetadataForCSE(NLoadI, LoadI);
+ combineMetadataForCSE(NLoadI, LoadI, false);
};
// If the returned value is the load itself, replace with an undef. This can
@@ -1486,7 +1493,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
}
for (LoadInst *PredLoadI : CSELoads) {
- combineMetadataForCSE(PredLoadI, LoadI);
+ combineMetadataForCSE(PredLoadI, LoadI, true);
}
LoadI->replaceAllUsesWith(PN);
@@ -1544,7 +1551,7 @@ FindMostPopularDest(BasicBlock *BB,
// successor list.
if (!SamePopularity.empty()) {
SamePopularity.push_back(MostPopularDest);
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
for (unsigned i = 0; ; ++i) {
assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
@@ -1664,10 +1671,10 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
}
// Finally update the terminator.
- TerminatorInst *Term = BB->getTerminator();
+ Instruction *Term = BB->getTerminator();
BranchInst::Create(OnlyDest, Term);
Term->eraseFromParent();
- DDT->applyUpdates(Updates);
+ DTU->applyUpdates(Updates);
// If the condition is now dead due to the removal of the old terminator,
// erase it.
@@ -1945,7 +1952,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
<< "' with cost: " << JumpThreadCost
<< ", across block:\n " << *BB << "\n");
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -1974,7 +1981,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
- for (; !isa<TerminatorInst>(BI); ++BI) {
+ for (; !BI->isTerminator(); ++BI) {
Instruction *New = BI->clone();
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
@@ -2001,7 +2008,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// Update the terminator of PredBB to jump to NewBB instead of BB. This
// eliminates predecessors from BB, which requires us to simplify any PHI
// nodes in BB.
- TerminatorInst *PredTerm = PredBB->getTerminator();
+ Instruction *PredTerm = PredBB->getTerminator();
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
if (PredTerm->getSuccessor(i) == BB) {
BB->removePredecessor(PredBB, true);
@@ -2009,7 +2016,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
}
// Enqueue required DT updates.
- DDT->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},
+ DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},
{DominatorTree::Insert, PredBB, NewBB},
{DominatorTree::Delete, PredBB, BB}});
@@ -2105,12 +2112,12 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
}
- DDT->applyUpdates(Updates);
+ DTU->applyUpdates(Updates);
return NewBBs[0];
}
bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
assert(TI->getNumSuccessors() > 1 && "not a split");
MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
@@ -2378,12 +2385,78 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
- DDT->applyUpdates(Updates);
+ DTU->applyUpdates(Updates);
++NumDupes;
return true;
}
+// Pred is a predecessor of BB with an unconditional branch to BB. SI is
+// a Select instruction in Pred. BB has other predecessors and SI is used in
+// a PHI node in BB. SI has no other use.
+// A new basic block, NewBB, is created and SI is converted to compare and
+// conditional branch. SI is erased from parent.
+void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
+ SelectInst *SI, PHINode *SIUse,
+ unsigned Idx) {
+ // Expand the select.
+ //
+ // Pred --
+ // | v
+ // | NewBB
+ // | |
+ // |-----
+ // v
+ // BB
+ BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
+ BB->getParent(), BB);
+ // Move the unconditional branch to NewBB.
+ PredTerm->removeFromParent();
+ NewBB->getInstList().insert(NewBB->end(), PredTerm);
+ // Create a conditional branch and update PHI nodes.
+ BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
+ SIUse->setIncomingValue(Idx, SI->getFalseValue());
+ SIUse->addIncoming(SI->getTrueValue(), NewBB);
+
+ // The select is now dead.
+ SI->eraseFromParent();
+ DTU->applyUpdates({{DominatorTree::Insert, NewBB, BB},
+ {DominatorTree::Insert, Pred, NewBB}});
+
+ // Update any other PHI nodes in BB.
+ for (BasicBlock::iterator BI = BB->begin();
+ PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
+ if (Phi != SIUse)
+ Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
+}
+
+bool JumpThreadingPass::TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
+ PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
+
+ if (!CondPHI || CondPHI->getParent() != BB)
+ return false;
+
+ for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
+ BasicBlock *Pred = CondPHI->getIncomingBlock(I);
+ SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
+
+ // The second and third condition can be potentially relaxed. Currently
+ // the conditions help to simplify the code and allow us to reuse existing
+ // code, developed for TryToUnfoldSelect(CmpInst *, BasicBlock *)
+ if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
+ continue;
+
+ BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
+ if (!PredTerm || !PredTerm->isUnconditional())
+ continue;
+
+ UnfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
+ return true;
+ }
+ return false;
+}
+
/// TryToUnfoldSelect - Look for blocks of the form
/// bb1:
/// %a = select
@@ -2421,7 +2494,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
// Now check if one of the select values would allow us to constant fold the
// terminator in BB. We don't do the transform if both sides fold, those
// cases will be threaded in any case.
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -2434,34 +2507,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
if ((LHSFolds != LazyValueInfo::Unknown ||
RHSFolds != LazyValueInfo::Unknown) &&
LHSFolds != RHSFolds) {
- // Expand the select.
- //
- // Pred --
- // | v
- // | NewBB
- // | |
- // |-----
- // v
- // BB
- BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
- BB->getParent(), BB);
- // Move the unconditional branch to NewBB.
- PredTerm->removeFromParent();
- NewBB->getInstList().insert(NewBB->end(), PredTerm);
- // Create a conditional branch and update PHI nodes.
- BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
- CondLHS->setIncomingValue(I, SI->getFalseValue());
- CondLHS->addIncoming(SI->getTrueValue(), NewBB);
- // The select is now dead.
- SI->eraseFromParent();
-
- DDT->applyUpdates({{DominatorTree::Insert, NewBB, BB},
- {DominatorTree::Insert, Pred, NewBB}});
- // Update any other PHI nodes in BB.
- for (BasicBlock::iterator BI = BB->begin();
- PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
- if (Phi != CondLHS)
- Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
+ UnfoldSelectInstr(Pred, BB, SI, CondLHS, I);
return true;
}
}
@@ -2533,7 +2579,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
if (!SI)
continue;
// Expand the select.
- TerminatorInst *Term =
+ Instruction *Term =
SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
BasicBlock *SplitBB = SI->getParent();
BasicBlock *NewBB = Term->getParent();
@@ -2548,12 +2594,12 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
Updates.push_back({DominatorTree::Insert, BB, SplitBB});
Updates.push_back({DominatorTree::Insert, BB, NewBB});
Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
- // BB's successors were moved to SplitBB, update DDT accordingly.
+ // BB's successors were moved to SplitBB, update DTU accordingly.
for (auto *Succ : successors(SplitBB)) {
Updates.push_back({DominatorTree::Delete, BB, Succ});
Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
}
- DDT->applyUpdates(Updates);
+ DTU->applyUpdates(Updates);
return true;
}
return false;
@@ -2603,9 +2649,8 @@ bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
for (auto &I : *BB)
- if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
- if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
- return true;
+ if (isGuard(&I) && ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
+ return true;
return false;
}
@@ -2651,28 +2696,16 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
// Duplicate all instructions before the guard and the guard itself to the
// branch where implication is not proved.
BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween(
- BB, PredGuardedBlock, AfterGuard, GuardedMapping);
+ BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
assert(GuardedBlock && "Could not create the guarded block?");
// Duplicate all instructions before the guard in the unguarded branch.
// Since we have successfully duplicated the guarded block and this block
// has fewer instructions, we expect it to succeed.
BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween(
- BB, PredUnguardedBlock, Guard, UnguardedMapping);
+ BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
assert(UnguardedBlock && "Could not create the unguarded block?");
LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
<< GuardedBlock->getName() << "\n");
- // DuplicateInstructionsInSplitBetween inserts a new block "BB.split" between
- // PredBB and BB. We need to perform two inserts and one delete for each of
- // the above calls to update Dominators.
- DDT->applyUpdates(
- {// Guarded block split.
- {DominatorTree::Delete, PredGuardedBlock, BB},
- {DominatorTree::Insert, PredGuardedBlock, GuardedBlock},
- {DominatorTree::Insert, GuardedBlock, BB},
- // Unguarded block split.
- {DominatorTree::Delete, PredUnguardedBlock, BB},
- {DominatorTree::Insert, PredUnguardedBlock, UnguardedBlock},
- {DominatorTree::Insert, UnguardedBlock, BB}});
// Some instructions before the guard may still have uses. For them, we need
// to create Phi nodes merging their copies in both guarded and unguarded
// branches. Those instructions that have no uses can be just removed.