summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/JumpThreading.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/JumpThreading.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp451
1 files changed, 329 insertions, 122 deletions
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 98c2fcb3dae0f..9d0500419a7f5 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -170,7 +171,7 @@ FunctionPass *llvm::createJumpThreadingPass(int Threshold) {
}
JumpThreadingPass::JumpThreadingPass(int T) {
- BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
+ DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
}
// Update branch probability information according to conditional
@@ -213,11 +214,16 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
if (!CondBr)
return;
- BranchProbability BP;
uint64_t TrueWeight, FalseWeight;
if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
return;
+ if (TrueWeight + FalseWeight == 0)
+ // Zero branch_weights do not give a hint for getting branch probabilities.
+ // Technically it would result in division by zero denominator, which is
+ // TrueWeight + FalseWeight.
+ return;
+
// Returns the outgoing edge of the dominating predecessor block
// that leads to the PhiNode's incoming block:
auto GetPredOutEdge =
@@ -252,10 +258,11 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
if (!CI || !CI->getType()->isIntegerTy(1))
continue;
- BP = (CI->isOne() ? BranchProbability::getBranchProbability(
- TrueWeight, TrueWeight + FalseWeight)
- : BranchProbability::getBranchProbability(
- FalseWeight, TrueWeight + FalseWeight));
+ BranchProbability BP =
+ (CI->isOne() ? BranchProbability::getBranchProbability(
+ TrueWeight, TrueWeight + FalseWeight)
+ : BranchProbability::getBranchProbability(
+ FalseWeight, TrueWeight + FalseWeight));
auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
if (!PredOutEdge.first)
@@ -298,8 +305,6 @@ bool JumpThreading::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- // Get DT analysis before LVI. When LVI is initialized it conditionally adds
- // DT if it's available.
auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -316,7 +321,7 @@ bool JumpThreading::runOnFunction(Function &F) {
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
- LVI->printLVI(F, *DT, dbgs());
+ LVI->printLVI(F, DTU.getDomTree(), dbgs());
}
return Changed;
}
@@ -324,8 +329,6 @@ bool JumpThreading::runOnFunction(Function &F) {
PreservedAnalyses JumpThreadingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
- // Get DT analysis before LVI. When LVI is initialized it conditionally adds
- // DT if it's available.
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
@@ -374,6 +377,15 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
BFI = std::move(BFI_);
}
+ // Reduce the number of instructions duplicated when optimizing strictly for
+ // size.
+ if (BBDuplicateThreshold.getNumOccurrences())
+ BBDupThreshold = BBDuplicateThreshold;
+ else if (F.hasFnAttribute(Attribute::MinSize))
+ BBDupThreshold = 3;
+ else
+ BBDupThreshold = DefaultBBDupThreshold;
+
// JumpThreading must not processes blocks unreachable from entry. It's a
// waste of compute time and can potentially lead to hangs.
SmallPtrSet<BasicBlock *, 16> Unreachable;
@@ -396,6 +408,12 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
continue;
while (ProcessBlock(&BB)) // Thread all of the branches we can over BB.
Changed = true;
+
+ // Jump threading may have introduced redundant debug values into BB
+ // which should be removed.
+ if (Changed)
+ RemoveRedundantDbgInstrs(&BB);
+
// Stop processing BB if it's the entry or is now deleted. The following
// routines attempt to eliminate BB and locating a suitable replacement
// for the entry is non-trivial.
@@ -418,26 +436,27 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// ProcessBlock doesn't thread BBs with unconditional TIs. However, if BB
// is "almost empty", we attempt to merge BB with its sole successor.
auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
- if (BI && BI->isUnconditional() &&
- // The terminator must be the only non-phi instruction in BB.
- BB.getFirstNonPHIOrDbg()->isTerminator() &&
- // Don't alter Loop headers and latches to ensure another pass can
- // detect and transform nested loops later.
- !LoopHeaders.count(&BB) && !LoopHeaders.count(BI->getSuccessor(0)) &&
- TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
- // BB is valid for cleanup here because we passed in DTU. F remains
- // BB's parent until a DTU->getDomTree() event.
- LVI->eraseBlock(&BB);
- Changed = true;
+ if (BI && BI->isUnconditional()) {
+ BasicBlock *Succ = BI->getSuccessor(0);
+ if (
+ // The terminator must be the only non-phi instruction in BB.
+ BB.getFirstNonPHIOrDbg()->isTerminator() &&
+ // Don't alter Loop headers and latches to ensure another pass can
+ // detect and transform nested loops later.
+ !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
+ TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
+ RemoveRedundantDbgInstrs(Succ);
+ // BB is valid for cleanup here because we passed in DTU. F remains
+ // BB's parent until a DTU->getDomTree() event.
+ LVI->eraseBlock(&BB);
+ Changed = true;
+ }
}
}
EverChanged |= Changed;
} while (Changed);
LoopHeaders.clear();
- // Flush only the Dominator Tree.
- DTU->getDomTree();
- LVI->enableDT();
return EverChanged;
}
@@ -592,20 +611,19 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
/// This returns true if there were any known values.
bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
Value *V, BasicBlock *BB, PredValueInfo &Result,
- ConstantPreference Preference,
- DenseSet<std::pair<Value *, BasicBlock *>> &RecursionSet,
+ ConstantPreference Preference, DenseSet<Value *> &RecursionSet,
Instruction *CxtI) {
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
// prevent this, keep track of what (value, block) pairs we've already visited
// and terminate the search if we loop back to them
- if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+ if (!RecursionSet.insert(V).second)
return false;
// If V is a constant, then it is known in all predecessors.
if (Constant *KC = getKnownConstant(V, Preference)) {
for (BasicBlock *Pred : predecessors(BB))
- Result.push_back(std::make_pair(KC, Pred));
+ Result.emplace_back(KC, Pred);
return !Result.empty();
}
@@ -627,17 +645,12 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
// able to handle value inequalities better, for example if the compare is
// "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
// Perhaps getConstantOnEdge should be smart enough to do this?
-
- if (DTU->hasPendingDomTreeUpdates())
- LVI->disableDT();
- else
- LVI->enableDT();
for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
if (Constant *KC = getKnownConstant(PredCst, Preference))
- Result.push_back(std::make_pair(KC, P));
+ Result.emplace_back(KC, P);
}
return !Result.empty();
@@ -645,20 +658,16 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
/// If I is a PHI node, then we know the incoming values for any constants.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
- if (DTU->hasPendingDomTreeUpdates())
- LVI->disableDT();
- else
- LVI->enableDT();
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *InVal = PN->getIncomingValue(i);
if (Constant *KC = getKnownConstant(InVal, Preference)) {
- Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+ Result.emplace_back(KC, PN->getIncomingBlock(i));
} else {
Constant *CI = LVI->getConstantOnEdge(InVal,
PN->getIncomingBlock(i),
BB, CxtI);
if (Constant *KC = getKnownConstant(CI, Preference))
- Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+ Result.emplace_back(KC, PN->getIncomingBlock(i));
}
}
@@ -757,7 +766,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
- Result.push_back(std::make_pair(KC, LHSVal.second));
+ Result.emplace_back(KC, LHSVal.second);
}
}
@@ -779,10 +788,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
const DataLayout &DL = PN->getModule()->getDataLayout();
// We can do this simplification if any comparisons fold to true or false.
// See if any do.
- if (DTU->hasPendingDomTreeUpdates())
- LVI->disableDT();
- else
- LVI->enableDT();
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PredBB = PN->getIncomingBlock(i);
Value *LHS, *RHS;
@@ -813,7 +818,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
}
if (Constant *KC = getKnownConstant(Res, WantInteger))
- Result.push_back(std::make_pair(KC, PredBB));
+ Result.emplace_back(KC, PredBB);
}
return !Result.empty();
@@ -826,10 +831,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
if (!isa<Instruction>(CmpLHS) ||
cast<Instruction>(CmpLHS)->getParent() != BB) {
- if (DTU->hasPendingDomTreeUpdates())
- LVI->disableDT();
- else
- LVI->enableDT();
for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
@@ -840,7 +841,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
continue;
Constant *ResC = ConstantInt::get(CmpType, Res);
- Result.push_back(std::make_pair(ResC, P));
+ Result.emplace_back(ResC, P);
}
return !Result.empty();
@@ -858,10 +859,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
if (!isa<Instruction>(AddLHS) ||
cast<Instruction>(AddLHS)->getParent() != BB) {
- if (DTU->hasPendingDomTreeUpdates())
- LVI->disableDT();
- else
- LVI->enableDT();
for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a ConstantRange in
// a predecessor, use that information to try to thread this
@@ -883,7 +880,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
else
continue;
- Result.push_back(std::make_pair(ResC, P));
+ Result.emplace_back(ResC, P);
}
return !Result.empty();
@@ -901,7 +898,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
Constant *V = LHSVal.first;
Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
- Result.push_back(std::make_pair(KC, LHSVal.second));
+ Result.emplace_back(KC, LHSVal.second);
}
return !Result.empty();
@@ -935,7 +932,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
// See if the select has a known constant value for this predecessor.
if (Constant *Val = KnownCond ? TrueVal : FalseVal)
- Result.push_back(std::make_pair(Val, C.second));
+ Result.emplace_back(Val, C.second);
}
return !Result.empty();
@@ -943,14 +940,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
}
// If all else fails, see if LVI can figure out a constant value for us.
- if (DTU->hasPendingDomTreeUpdates())
- LVI->disableDT();
- else
- LVI->enableDT();
Constant *CI = LVI->getConstant(V, BB, CxtI);
if (Constant *KC = getKnownConstant(CI, Preference)) {
for (BasicBlock *Pred : predecessors(BB))
- Result.push_back(std::make_pair(KC, Pred));
+ Result.emplace_back(KC, Pred);
}
return !Result.empty();
@@ -1106,10 +1099,6 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// threading is concerned.
assert(CondBr->isConditional() && "Threading on unconditional terminator");
- if (DTU->hasPendingDomTreeUpdates())
- LVI->disableDT();
- else
- LVI->enableDT();
LazyValueInfo::Tristate Ret =
LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
CondConst, CondBr);
@@ -1363,7 +1352,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
- AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
+ AvailablePreds.emplace_back(PredBB, PredAvailable);
}
// If the loaded value isn't available in any predecessor, it isn't partially
@@ -1430,14 +1419,14 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
"Can't handle critical edge here!");
LoadInst *NewVal = new LoadInst(
LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
- LoadI->getName() + ".pr", false, MaybeAlign(LoadI->getAlignment()),
+ LoadI->getName() + ".pr", false, LoadI->getAlign(),
LoadI->getOrdering(), LoadI->getSyncScopeID(),
UnavailablePred->getTerminator());
NewVal->setDebugLoc(LoadI->getDebugLoc());
if (AATags)
NewVal->setAAMetadata(AATags);
- AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
+ AvailablePreds.emplace_back(UnavailablePred, NewVal);
}
// Now we know that each predecessor of this block has a value in
@@ -1496,56 +1485,70 @@ FindMostPopularDest(BasicBlock *BB,
// explicitly choose to ignore 'undef' destinations. We prefer to thread
// blocks with known and real destinations to threading undef. We'll handle
// them later if interesting.
- DenseMap<BasicBlock*, unsigned> DestPopularity;
+ MapVector<BasicBlock *, unsigned> DestPopularity;
+
+ // Populate DestPopularity with the successors in the order they appear in the
+ // successor list. This way, we ensure determinism by iterating it in the
+ // same order in std::max_element below. We map nullptr to 0 so that we can
+ // return nullptr when PredToDestList contains nullptr only.
+ DestPopularity[nullptr] = 0;
+ for (auto *SuccBB : successors(BB))
+ DestPopularity[SuccBB] = 0;
+
for (const auto &PredToDest : PredToDestList)
if (PredToDest.second)
DestPopularity[PredToDest.second]++;
- if (DestPopularity.empty())
- return nullptr;
-
// Find the most popular dest.
- DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
- BasicBlock *MostPopularDest = DPI->first;
- unsigned Popularity = DPI->second;
- SmallVector<BasicBlock*, 4> SamePopularity;
-
- for (++DPI; DPI != DestPopularity.end(); ++DPI) {
- // If the popularity of this entry isn't higher than the popularity we've
- // seen so far, ignore it.
- if (DPI->second < Popularity)
- ; // ignore.
- else if (DPI->second == Popularity) {
- // If it is the same as what we've seen so far, keep track of it.
- SamePopularity.push_back(DPI->first);
- } else {
- // If it is more popular, remember it.
- SamePopularity.clear();
- MostPopularDest = DPI->first;
- Popularity = DPI->second;
- }
+ using VT = decltype(DestPopularity)::value_type;
+ auto MostPopular = std::max_element(
+ DestPopularity.begin(), DestPopularity.end(),
+ [](const VT &L, const VT &R) { return L.second < R.second; });
+
+ // Okay, we have finally picked the most popular destination.
+ return MostPopular->first;
+}
+
+// Try to evaluate the value of V when the control flows from PredPredBB to
+// BB->getSinglePredecessor() and then on to BB.
+Constant *JumpThreadingPass::EvaluateOnPredecessorEdge(BasicBlock *BB,
+ BasicBlock *PredPredBB,
+ Value *V) {
+ BasicBlock *PredBB = BB->getSinglePredecessor();
+ assert(PredBB && "Expected a single predecessor");
+
+ if (Constant *Cst = dyn_cast<Constant>(V)) {
+ return Cst;
}
- // Okay, now we know the most popular destination. If there is more than one
- // destination, we need to determine one. This is arbitrary, but we need
- // to make a deterministic decision. Pick the first one that appears in the
- // successor list.
- if (!SamePopularity.empty()) {
- SamePopularity.push_back(MostPopularDest);
- Instruction *TI = BB->getTerminator();
- for (unsigned i = 0; ; ++i) {
- assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
+ // Consult LVI if V is not an instruction in BB or PredBB.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
+ return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
+ }
- if (!is_contained(SamePopularity, TI->getSuccessor(i)))
- continue;
+ // Look into a PHI argument.
+ if (PHINode *PHI = dyn_cast<PHINode>(V)) {
+ if (PHI->getParent() == PredBB)
+ return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
+ return nullptr;
+ }
- MostPopularDest = TI->getSuccessor(i);
- break;
+ // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
+ if (CondCmp->getParent() == BB) {
+ Constant *Op0 =
+ EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
+ Constant *Op1 =
+ EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
+ if (Op0 && Op1) {
+ return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
+ }
}
+ return nullptr;
}
- // Okay, we have finally picked the most popular destination.
- return MostPopularDest;
+ return nullptr;
}
bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
@@ -1557,8 +1560,12 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
return false;
PredValueInfoTy PredValues;
- if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
- return false;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
+ CxtI)) {
+ // We don't have known values in predecessors. See if we can thread through
+ // BB and its sole predecessor.
+ return MaybeThreadThroughTwoBasicBlocks(BB, Cond);
+ }
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
@@ -1624,7 +1631,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
isa<CallBrInst>(Pred->getTerminator()))
continue;
- PredToDestList.push_back(std::make_pair(Pred, DestBB));
+ PredToDestList.emplace_back(Pred, DestBB);
}
// If all edges were unthreadable, we fail.
@@ -2015,6 +2022,205 @@ JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
return ValueMapping;
}
+/// Attempt to thread through two successive basic blocks.
+bool JumpThreadingPass::MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB,
+ Value *Cond) {
+ // Consider:
+ //
+ // PredBB:
+ // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
+ // %tobool = icmp eq i32 %cond, 0
+ // br i1 %tobool, label %BB, label ...
+ //
+ // BB:
+ // %cmp = icmp eq i32* %var, null
+ // br i1 %cmp, label ..., label ...
+ //
+ // We don't know the value of %var at BB even if we know which incoming edge
+ // we take to BB. However, once we duplicate PredBB for each of its incoming
+ // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
+ // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
+
+ // Require that BB end with a Branch for simplicity.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!CondBr)
+ return false;
+
+ // BB must have exactly one predecessor.
+ BasicBlock *PredBB = BB->getSinglePredecessor();
+ if (!PredBB)
+ return false;
+
+ // Require that PredBB end with a conditional Branch. If PredBB ends with an
+ // unconditional branch, we should be merging PredBB and BB instead. For
+ // simplicity, we don't deal with a switch.
+ BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
+ if (!PredBBBranch || PredBBBranch->isUnconditional())
+ return false;
+
+ // If PredBB has exactly one incoming edge, we don't gain anything by copying
+ // PredBB.
+ if (PredBB->getSinglePredecessor())
+ return false;
+
+ // Don't thread through PredBB if it contains a successor edge to itself, in
+ // which case we would infinite loop. Suppose we are threading an edge from
+ // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
+ // successor edge to itself. If we allowed jump threading in this case, we
+ // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
+ // PredBB.thread has a successor edge to PredBB, we would immediately come up
+ // with another jump threading opportunity from PredBB.thread through PredBB
+ // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
+ // would keep peeling one iteration from PredBB.
+ if (llvm::is_contained(successors(PredBB), PredBB))
+ return false;
+
+ // Don't thread across a loop header.
+ if (LoopHeaders.count(PredBB))
+ return false;
+
+ // Avoid complication with duplicating EH pads.
+ if (PredBB->isEHPad())
+ return false;
+
+ // Find a predecessor that we can thread. For simplicity, we only consider a
+ // successor edge out of BB to which we thread exactly one incoming edge into
+ // PredBB.
+ unsigned ZeroCount = 0;
+ unsigned OneCount = 0;
+ BasicBlock *ZeroPred = nullptr;
+ BasicBlock *OnePred = nullptr;
+ for (BasicBlock *P : predecessors(PredBB)) {
+ if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
+ EvaluateOnPredecessorEdge(BB, P, Cond))) {
+ if (CI->isZero()) {
+ ZeroCount++;
+ ZeroPred = P;
+ } else if (CI->isOne()) {
+ OneCount++;
+ OnePred = P;
+ }
+ }
+ }
+
+ // Disregard complicated cases where we have to thread multiple edges.
+ BasicBlock *PredPredBB;
+ if (ZeroCount == 1) {
+ PredPredBB = ZeroPred;
+ } else if (OneCount == 1) {
+ PredPredBB = OnePred;
+ } else {
+ return false;
+ }
+
+ BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
+
+ // If threading to the same block as we come from, we would infinite loop.
+ if (SuccBB == BB) {
+ LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
+ << "' - would thread to self!\n");
+ return false;
+ }
+
+ // If threading this would thread across a loop header, don't thread the edge.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
+ LLVM_DEBUG({
+ bool BBIsHeader = LoopHeaders.count(BB);
+ bool SuccIsHeader = LoopHeaders.count(SuccBB);
+ dbgs() << " Not threading across "
+ << (BBIsHeader ? "loop header BB '" : "block BB '")
+ << BB->getName() << "' to dest "
+ << (SuccIsHeader ? "loop header BB '" : "block BB '")
+ << SuccBB->getName()
+ << "' - it might create an irreducible loop!\n";
+ });
+ return false;
+ }
+
+ // Compute the cost of duplicating BB and PredBB.
+ unsigned BBCost =
+ getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ unsigned PredBBCost = getJumpThreadDuplicationCost(
+ PredBB, PredBB->getTerminator(), BBDupThreshold);
+
+ // Give up if costs are too high. We need to check BBCost and PredBBCost
+ // individually before checking their sum because getJumpThreadDuplicationCost
+ // return (unsigned)~0 for those basic blocks that cannot be duplicated.
+ if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
+ BBCost + PredBBCost > BBDupThreshold) {
+ LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
+ << "' - Cost is too high: " << PredBBCost
+ << " for PredBB, " << BBCost << "for BB\n");
+ return false;
+ }
+
+ // Now we are ready to duplicate PredBB.
+ ThreadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
+ return true;
+}
+
+void JumpThreadingPass::ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
+ BasicBlock *PredBB,
+ BasicBlock *BB,
+ BasicBlock *SuccBB) {
+ LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
+ << BB->getName() << "'\n");
+
+ BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
+ BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
+
+ BasicBlock *NewBB =
+ BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
+ PredBB->getParent(), PredBB);
+ NewBB->moveAfter(PredBB);
+
+ // Set the block frequency of NewBB.
+ if (HasProfileData) {
+ auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
+ BPI->getEdgeProbability(PredPredBB, PredBB);
+ BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ }
+
+ // We are going to have to map operands from the original BB block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
+ // to account for entry from PredPredBB.
+ DenseMap<Instruction *, Value *> ValueMapping =
+ CloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
+
+ // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
+ // This eliminates predecessors from PredPredBB, which requires us to simplify
+ // any PHI nodes in PredBB.
+ Instruction *PredPredTerm = PredPredBB->getTerminator();
+ for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
+ if (PredPredTerm->getSuccessor(i) == PredBB) {
+ PredBB->removePredecessor(PredPredBB, true);
+ PredPredTerm->setSuccessor(i, NewBB);
+ }
+
+ AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
+ ValueMapping);
+ AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
+ ValueMapping);
+
+ DTU->applyUpdatesPermissive(
+ {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
+ {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
+ {DominatorTree::Insert, PredPredBB, NewBB},
+ {DominatorTree::Delete, PredPredBB, PredBB}});
+
+ UpdateSSA(PredBB, NewBB, ValueMapping);
+
+ // Clean up things like PHI nodes with single operands, dead instructions,
+ // etc.
+ SimplifyInstructionsInBlock(NewBB, TLI);
+ SimplifyInstructionsInBlock(PredBB, TLI);
+
+ SmallVector<BasicBlock *, 1> PredsToFactor;
+ PredsToFactor.push_back(NewBB);
+ ThreadEdge(BB, PredsToFactor, SuccBB);
+}
+
/// TryThreadEdge - Thread an edge if it's safe and profitable to do so.
bool JumpThreadingPass::TryThreadEdge(
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
@@ -2078,10 +2284,6 @@ void JumpThreadingPass::ThreadEdge(BasicBlock *BB,
<< "' to '" << SuccBB->getName()
<< ", across block:\n " << *BB << "\n");
- if (DTU->hasPendingDomTreeUpdates())
- LVI->disableDT();
- else
- LVI->enableDT();
LVI->threadEdge(PredBB, BB, SuccBB);
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
@@ -2246,8 +2448,7 @@ void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
}
// Update edge probabilities in BPI.
- for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
- BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
+ BPI->setEdgeProbability(BB, BBSuccProbs);
// Update the profile metadata as well.
//
@@ -2524,10 +2725,6 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
// Now check if one of the select values would allow us to constant fold the
// terminator in BB. We don't do the transform if both sides fold, those
// cases will be threaded in any case.
- if (DTU->hasPendingDomTreeUpdates())
- LVI->disableDT();
- else
- LVI->enableDT();
LazyValueInfo::Tristate LHSFolds =
LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
CondRHS, Pred, BB, CondCmp);
@@ -2565,6 +2762,16 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
/// select is not jump-threaded, it will be folded again in the later
/// optimizations.
bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
+ // This transform can introduce a UB (a conditional branch that depends on a
+ // poison value) that was not present in the original program. See
+ // @TryToUnfoldSelectInCurrBB test in test/Transforms/JumpThreading/select.ll.
+ // Disable this transform under MemorySanitizer.
+ // FIXME: either delete it or replace with a valid transform. This issue is
+ // not limited to MemorySanitizer (but has only been observed as an MSan false
+ // positive in practice so far).
+ if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
+ return false;
+
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB))