diff options
Diffstat (limited to 'lib/Transforms/Scalar/JumpThreading.cpp')
-rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 546 |
1 files changed, 349 insertions, 197 deletions
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 1476f7850cf0..1d66472f93c8 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -64,7 +65,6 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> @@ -131,10 +131,11 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - if (PrintLVIAfterJumpThreading) - AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<LazyValueInfoWrapperPass>(); + AU.addPreserved<LazyValueInfoWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); } @@ -148,6 +149,7 @@ char JumpThreading::ID = 0; INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) @@ -164,7 +166,7 @@ JumpThreadingPass::JumpThreadingPass(int T) { } // Update branch probability information according to conditional -// branch probablity. This is usually made possible for cloned branches +// branch probability. This is usually made possible for cloned branches // in inline instances by the context specific profile in the caller. // For instance, // @@ -278,8 +280,12 @@ bool JumpThreading::runOnFunction(Function &F) { if (skipFunction(F)) return false; auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + // Get DT analysis before LVI. When LVI is initialized it conditionally adds + // DT if it's available. + auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + DeferredDominance DDT(*DT); std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; bool HasProfileData = F.hasProfileData(); @@ -289,12 +295,11 @@ bool JumpThreading::runOnFunction(Function &F) { BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DDT, HasProfileData, + std::move(BFI), std::move(BPI)); if (PrintLVIAfterJumpThreading) { dbgs() << "LVI for function '" << F.getName() << "':\n"; - LVI->printLVI(F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(), - dbgs()); + LVI->printLVI(F, *DT, dbgs()); } return Changed; } @@ -302,8 +307,12 @@ bool JumpThreading::runOnFunction(Function &F) { PreservedAnalyses JumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + // Get DT analysis before LVI. When LVI is initialized it conditionally adds + // DT if it's available. + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &LVI = AM.getResult<LazyValueAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); + DeferredDominance DDT(DT); std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; @@ -313,25 +322,28 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = runImpl(F, &TLI, &LVI, &AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = runImpl(F, &TLI, &LVI, &AA, &DDT, HasProfileData, + std::move(BFI), std::move(BPI)); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LazyValueAnalysis>(); return PA; } bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, AliasAnalysis *AA_, - bool HasProfileData_, + DeferredDominance *DDT_, bool HasProfileData_, std::unique_ptr<BlockFrequencyInfo> BFI_, std::unique_ptr<BranchProbabilityInfo> BPI_) { - DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); + LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = TLI_; LVI = LVI_; AA = AA_; + DDT = DDT_; BFI.reset(); BPI.reset(); // When profile data is available, we need to update edge weights after @@ -345,69 +357,66 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, BFI = std::move(BFI_); } - // Remove unreachable blocks from function as they may result in infinite - // loop. We do threading if we found something profitable. Jump threading a - // branch can create other opportunities. If these opportunities form a cycle - // i.e. if any jump threading is undoing previous threading in the path, then - // we will loop forever. We take care of this issue by not jump threading for - // back edges. This works for normal cases but not for unreachable blocks as - // they may have cycle with no back edge. - bool EverChanged = false; - EverChanged |= removeUnreachableBlocks(F, LVI); + // JumpThreading must not processes blocks unreachable from entry. It's a + // waste of compute time and can potentially lead to hangs. + SmallPtrSet<BasicBlock *, 16> Unreachable; + DominatorTree &DT = DDT->flush(); + for (auto &BB : F) + if (!DT.isReachableFromEntry(&BB)) + Unreachable.insert(&BB); FindLoopHeaders(F); + bool EverChanged = false; bool Changed; do { Changed = false; - for (Function::iterator I = F.begin(), E = F.end(); I != E;) { - BasicBlock *BB = &*I; - // Thread all of the branches we can over this block. - while (ProcessBlock(BB)) + for (auto &BB : F) { + if (Unreachable.count(&BB)) + continue; + while (ProcessBlock(&BB)) // Thread all of the branches we can over BB. Changed = true; + // Stop processing BB if it's the entry or is now deleted. The following + // routines attempt to eliminate BB and locating a suitable replacement + // for the entry is non-trivial. + if (&BB == &F.getEntryBlock() || DDT->pendingDeletedBB(&BB)) + continue; - ++I; - - // If the block is trivially dead, zap it. This eliminates the successor - // edges which simplifies the CFG. - if (pred_empty(BB) && - BB != &BB->getParent()->getEntryBlock()) { - DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName() - << "' with terminator: " << *BB->getTerminator() << '\n'); - LoopHeaders.erase(BB); - LVI->eraseBlock(BB); - DeleteDeadBlock(BB); + if (pred_empty(&BB)) { + // When ProcessBlock makes BB unreachable it doesn't bother to fix up + // the instructions in it. We must remove BB to prevent invalid IR. + LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName() + << "' with terminator: " << *BB.getTerminator() + << '\n'); + LoopHeaders.erase(&BB); + LVI->eraseBlock(&BB); + DeleteDeadBlock(&BB, DDT); Changed = true; continue; } - BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); - - // Can't thread an unconditional jump, but if the block is "almost - // empty", we can replace uses of it with uses of the successor and make - // this dead. - // We should not eliminate the loop header or latch either, because - // eliminating a loop header or latch might later prevent LoopSimplify - // from transforming nested loops into simplified form. We will rely on - // later passes in backend to clean up empty blocks. + // ProcessBlock doesn't thread BBs with unconditional TIs. However, if BB + // is "almost empty", we attempt to merge BB with its sole successor. + auto *BI = dyn_cast<BranchInst>(BB.getTerminator()); if (BI && BI->isUnconditional() && - BB != &BB->getParent()->getEntryBlock() && - // If the terminator is the only non-phi instruction, try to nuke it. - BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) && - !LoopHeaders.count(BI->getSuccessor(0))) { - // FIXME: It is always conservatively correct to drop the info - // for a block even if it doesn't get erased. This isn't totally - // awesome, but it allows us to use AssertingVH to prevent nasty - // dangling pointer issues within LazyValueInfo. - LVI->eraseBlock(BB); - if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) - Changed = true; + // The terminator must be the only non-phi instruction in BB. + BB.getFirstNonPHIOrDbg()->isTerminator() && + // Don't alter Loop headers and latches to ensure another pass can + // detect and transform nested loops later. + !LoopHeaders.count(&BB) && !LoopHeaders.count(BI->getSuccessor(0)) && + TryToSimplifyUncondBranchFromEmptyBlock(&BB, DDT)) { + // BB is valid for cleanup here because we passed in DDT. F remains + // BB's parent until a DDT->flush() event. + LVI->eraseBlock(&BB); + Changed = true; } } EverChanged |= Changed; } while (Changed); LoopHeaders.clear(); + DDT->flush(); + LVI->enableDT(); return EverChanged; } @@ -600,6 +609,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. // Perhaps getConstantOnEdge should be smart enough to do this? + if (DDT->pending()) + LVI->disableDT(); + else + LVI->enableDT(); for (BasicBlock *P : predecessors(BB)) { // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. @@ -613,6 +626,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( /// If I is a PHI node, then we know the incoming values for any constants. if (PHINode *PN = dyn_cast<PHINode>(I)) { + if (DDT->pending()) + LVI->disableDT(); + else + LVI->enableDT(); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *InVal = PN->getIncomingValue(i); if (Constant *KC = getKnownConstant(InVal, Preference)) { @@ -630,11 +647,9 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( } // Handle Cast instructions. Only see through Cast when the source operand is - // PHI or Cmp and the source type is i1 to save the compilation time. + // PHI or Cmp to save the compilation time. if (CastInst *CI = dyn_cast<CastInst>(I)) { Value *Source = CI->getOperand(0); - if (!Source->getType()->isIntegerTy(1)) - return false; if (!isa<PHINode>(Source) && !isa<CmpInst>(Source)) return false; ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI); @@ -738,20 +753,36 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( CmpInst::Predicate Pred = Cmp->getPredicate(); PHINode *PN = dyn_cast<PHINode>(CmpLHS); + if (!PN) + PN = dyn_cast<PHINode>(CmpRHS); if (PN && PN->getParent() == BB) { const DataLayout &DL = PN->getModule()->getDataLayout(); // We can do this simplification if any comparisons fold to true or false. // See if any do. + if (DDT->pending()) + LVI->disableDT(); + else + LVI->enableDT(); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = PN->getIncomingBlock(i); - Value *LHS = PN->getIncomingValue(i); - Value *RHS = CmpRHS->DoPHITranslation(BB, PredBB); - + Value *LHS, *RHS; + if (PN == CmpLHS) { + LHS = PN->getIncomingValue(i); + RHS = CmpRHS->DoPHITranslation(BB, PredBB); + } else { + LHS = CmpLHS->DoPHITranslation(BB, PredBB); + RHS = PN->getIncomingValue(i); + } Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL}); if (!Res) { if (!isa<Constant>(RHS)) continue; + // getPredicateOnEdge call will make no sense if LHS is defined in BB. + auto LHSInst = dyn_cast<Instruction>(LHS); + if (LHSInst && LHSInst->getParent() == BB) + continue; + LazyValueInfo::Tristate ResT = LVI->getPredicateOnEdge(Pred, LHS, cast<Constant>(RHS), PredBB, BB, @@ -775,6 +806,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( if (!isa<Instruction>(CmpLHS) || cast<Instruction>(CmpLHS)->getParent() != BB) { + if (DDT->pending()) + LVI->disableDT(); + else + LVI->enableDT(); for (BasicBlock *P : predecessors(BB)) { // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. @@ -803,6 +838,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) { if (!isa<Instruction>(AddLHS) || cast<Instruction>(AddLHS)->getParent() != BB) { + if (DDT->pending()) + LVI->disableDT(); + else + LVI->enableDT(); for (BasicBlock *P : predecessors(BB)) { // If the value is known by LazyValueInfo to be a ConstantRange in // a predecessor, use that information to try to thread this @@ -884,6 +923,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( } // If all else fails, see if LVI can figure out a constant value for us. + if (DDT->pending()) + LVI->disableDT(); + else + LVI->enableDT(); Constant *CI = LVI->getConstant(V, BB, CxtI); if (Constant *KC = getKnownConstant(CI, Preference)) { for (BasicBlock *Pred : predecessors(BB)) @@ -903,10 +946,10 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { unsigned MinSucc = 0; BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); // Compute the successor with the minimum number of predecessors. - unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + unsigned MinNumPreds = pred_size(TestBB); for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { TestBB = BBTerm->getSuccessor(i); - unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + unsigned NumPreds = pred_size(TestBB); if (NumPreds < MinNumPreds) { MinSucc = i; MinNumPreds = NumPreds; @@ -931,8 +974,8 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) { bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // If the block is trivially dead, just return and let the caller nuke it. // This simplifies other transformations. - if (pred_empty(BB) && - BB != &BB->getParent()->getEntryBlock()) + if (DDT->pendingDeletedBB(BB) || + (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock())) return false; // If this block has a single predecessor, and if that pred has a single @@ -948,7 +991,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { LoopHeaders.insert(BB); LVI->eraseBlock(SinglePred); - MergeBasicBlockIntoOnlyPred(BB); + MergeBasicBlockIntoOnlyPred(BB, nullptr, DDT); // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by // BB code within one basic block `BB`), we need to invalidate the LVI @@ -977,9 +1020,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // Invalidate LVI information for BB if the LVI is not provably true for // all of BB. - if (any_of(*BB, [](Instruction &I) { - return !isGuaranteedToTransferExecutionToSuccessor(&I); - })) + if (!isGuaranteedToTransferExecutionToSuccessor(BB)) LVI->eraseBlock(BB); return true; } @@ -1031,18 +1072,23 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa<UndefValue>(Condition)) { unsigned BestSucc = GetBestDestForJumpOnUndef(BB); + std::vector<DominatorTree::UpdateType> Updates; // Fold the branch/switch. TerminatorInst *BBTerm = BB->getTerminator(); + Updates.reserve(BBTerm->getNumSuccessors()); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { if (i == BestSucc) continue; - BBTerm->getSuccessor(i)->removePredecessor(BB, true); + BasicBlock *Succ = BBTerm->getSuccessor(i); + Succ->removePredecessor(BB, true); + Updates.push_back({DominatorTree::Delete, BB, Succ}); } - DEBUG(dbgs() << " In block '" << BB->getName() - << "' folding undef terminator: " << *BBTerm << '\n'); + LLVM_DEBUG(dbgs() << " In block '" << BB->getName() + << "' folding undef terminator: " << *BBTerm << '\n'); BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); + DDT->applyUpdates(Updates); return true; } @@ -1050,10 +1096,11 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // terminator to an unconditional branch. This can occur due to threading in // other blocks. if (getKnownConstant(Condition, Preference)) { - DEBUG(dbgs() << " In block '" << BB->getName() - << "' folding terminator: " << *BB->getTerminator() << '\n'); + LLVM_DEBUG(dbgs() << " In block '" << BB->getName() + << "' folding terminator: " << *BB->getTerminator() + << '\n'); ++NumFolds; - ConstantFoldTerminator(BB, true); + ConstantFoldTerminator(BB, true, nullptr, DDT); return true; } @@ -1080,13 +1127,18 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // threading is concerned. assert(CondBr->isConditional() && "Threading on unconditional terminator"); + if (DDT->pending()) + LVI->disableDT(); + else + LVI->enableDT(); LazyValueInfo::Tristate Ret = LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0), CondConst, CondBr); if (Ret != LazyValueInfo::Unknown) { unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0; unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1; - CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true); + BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove); + ToRemoveSucc->removePredecessor(BB, true); BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); CondBr->eraseFromParent(); if (CondCmp->use_empty()) @@ -1104,6 +1156,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { ConstantInt::getFalse(CondCmp->getType()); ReplaceFoldableUses(CondCmp, CI); } + DDT->deleteEdge(BB, ToRemoveSucc); return true; } @@ -1125,8 +1178,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // TODO: There are other places where load PRE would be profitable, such as // more complex comparisons. - if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue)) - if (SimplifyPartiallyRedundantLoad(LI)) + if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue)) + if (SimplifyPartiallyRedundantLoad(LoadI)) return true; // Before threading, try to propagate profile data backwards: @@ -1182,9 +1235,12 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) { Optional<bool> Implication = isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue); if (Implication) { - BI->getSuccessor(*Implication ? 1 : 0)->removePredecessor(BB); - BranchInst::Create(BI->getSuccessor(*Implication ? 0 : 1), BI); + BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1); + BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0); + RemoveSucc->removePredecessor(BB); + BranchInst::Create(KeepSucc, BI); BI->eraseFromParent(); + DDT->deleteEdge(BB, RemoveSucc); return true; } CurrentBB = CurrentPred; @@ -1202,17 +1258,17 @@ static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) { return false; } -/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant -/// load instruction, eliminate it by replacing it with a PHI node. This is an -/// important optimization that encourages jump threading, and needs to be run -/// interlaced with other jump threading tasks. -bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { +/// SimplifyPartiallyRedundantLoad - If LoadI is an obviously partially +/// redundant load instruction, eliminate it by replacing it with a PHI node. +/// This is an important optimization that encourages jump threading, and needs +/// to be run interlaced with other jump threading tasks. +bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) { // Don't hack volatile and ordered loads. - if (!LI->isUnordered()) return false; + if (!LoadI->isUnordered()) return false; // If the load is defined in a block with exactly one predecessor, it can't be // partially redundant. - BasicBlock *LoadBB = LI->getParent(); + BasicBlock *LoadBB = LoadI->getParent(); if (LoadBB->getSinglePredecessor()) return false; @@ -1222,7 +1278,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (LoadBB->isEHPad()) return false; - Value *LoadedPtr = LI->getOperand(0); + Value *LoadedPtr = LoadI->getOperand(0); // If the loaded operand is defined in the LoadBB and its not a phi, // it can't be available in predecessors. @@ -1231,26 +1287,27 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Scan a few instructions up from the load, to see if it is obviously live at // the entry to its block. - BasicBlock::iterator BBIt(LI); + BasicBlock::iterator BBIt(LoadI); bool IsLoadCSE; if (Value *AvailableVal = FindAvailableLoadedValue( - LI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) { + LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) { // If the value of the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. if (IsLoadCSE) { - LoadInst *NLI = cast<LoadInst>(AvailableVal); - combineMetadataForCSE(NLI, LI); + LoadInst *NLoadI = cast<LoadInst>(AvailableVal); + combineMetadataForCSE(NLoadI, LoadI); }; // If the returned value is the load itself, replace with an undef. This can // only happen in dead loops. - if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); - if (AvailableVal->getType() != LI->getType()) - AvailableVal = - CastInst::CreateBitOrPointerCast(AvailableVal, LI->getType(), "", LI); - LI->replaceAllUsesWith(AvailableVal); - LI->eraseFromParent(); + if (AvailableVal == LoadI) + AvailableVal = UndefValue::get(LoadI->getType()); + if (AvailableVal->getType() != LoadI->getType()) + AvailableVal = CastInst::CreateBitOrPointerCast( + AvailableVal, LoadI->getType(), "", LoadI); + LoadI->replaceAllUsesWith(AvailableVal); + LoadI->eraseFromParent(); return true; } @@ -1263,7 +1320,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If all of the loads and stores that feed the value have the same AA tags, // then we can propagate them onto any newly inserted loads. AAMDNodes AATags; - LI->getAAMetadata(AATags); + LoadI->getAAMetadata(AATags); SmallPtrSet<BasicBlock*, 8> PredsScanned; @@ -1285,16 +1342,17 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { Value *PredAvailable = nullptr; // NOTE: We don't CSE load that is volatile or anything stronger than // unordered, that should have been checked when we entered the function. - assert(LI->isUnordered() && "Attempting to CSE volatile or atomic loads"); + assert(LoadI->isUnordered() && + "Attempting to CSE volatile or atomic loads"); // If this is a load on a phi pointer, phi-translate it and search // for available load/store to the pointer in predecessors. Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB); PredAvailable = FindAvailablePtrLoadStore( - Ptr, LI->getType(), LI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan, - AA, &IsLoadCSE, &NumScanedInst); + Ptr, LoadI->getType(), LoadI->isAtomic(), PredBB, BBIt, + DefMaxInstsToScan, AA, &IsLoadCSE, &NumScanedInst); // If PredBB has a single predecessor, continue scanning through the - // single precessor. + // single predecessor. BasicBlock *SinglePredBB = PredBB; while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() && NumScanedInst < DefMaxInstsToScan) { @@ -1302,7 +1360,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (SinglePredBB) { BBIt = SinglePredBB->end(); PredAvailable = FindAvailablePtrLoadStore( - Ptr, LI->getType(), LI->isAtomic(), SinglePredBB, BBIt, + Ptr, LoadI->getType(), LoadI->isAtomic(), SinglePredBB, BBIt, (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE, &NumScanedInst); } @@ -1334,15 +1392,15 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If the value is unavailable in one of predecessors, we will end up // inserting a new instruction into them. It is only valid if all the - // instructions before LI are guaranteed to pass execution to its successor, - // or if LI is safe to speculate. + // instructions before LoadI are guaranteed to pass execution to its + // successor, or if LoadI is safe to speculate. // TODO: If this logic becomes more complex, and we will perform PRE insertion // farther than to a predecessor, we need to reuse the code from GVN's PRE. // It requires domination tree analysis, so for this simple case it is an // overkill. if (PredsScanned.size() != AvailablePreds.size() && - !isSafeToSpeculativelyExecute(LI)) - for (auto I = LoadBB->begin(); &*I != LI; ++I) + !isSafeToSpeculativelyExecute(LoadI)) + for (auto I = LoadBB->begin(); &*I != LoadI; ++I) if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) return false; @@ -1381,11 +1439,12 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (UnavailablePred) { assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && "Can't handle critical edge here!"); - LoadInst *NewVal = new LoadInst( - LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred), - LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(), - LI->getSyncScopeID(), UnavailablePred->getTerminator()); - NewVal->setDebugLoc(LI->getDebugLoc()); + LoadInst *NewVal = + new LoadInst(LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred), + LoadI->getName() + ".pr", false, LoadI->getAlignment(), + LoadI->getOrdering(), LoadI->getSyncScopeID(), + UnavailablePred->getTerminator()); + NewVal->setDebugLoc(LoadI->getDebugLoc()); if (AATags) NewVal->setAAMetadata(AATags); @@ -1398,10 +1457,10 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Create a PHI node at the start of the block for the PRE'd load value. pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB); - PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "", + PHINode *PN = PHINode::Create(LoadI->getType(), std::distance(PB, PE), "", &LoadBB->front()); - PN->takeName(LI); - PN->setDebugLoc(LI->getDebugLoc()); + PN->takeName(LoadI); + PN->setDebugLoc(LoadI->getDebugLoc()); // Insert new entries into the PHI for each predecessor. A single block may // have multiple entries here. @@ -1419,19 +1478,19 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // AvailablePreds vector as we go so that all of the PHI entries for this // predecessor use the same bitcast. Value *&PredV = I->second; - if (PredV->getType() != LI->getType()) - PredV = CastInst::CreateBitOrPointerCast(PredV, LI->getType(), "", + if (PredV->getType() != LoadI->getType()) + PredV = CastInst::CreateBitOrPointerCast(PredV, LoadI->getType(), "", P->getTerminator()); PN->addIncoming(PredV, I->first); } - for (LoadInst *PredLI : CSELoads) { - combineMetadataForCSE(PredLI, LI); + for (LoadInst *PredLoadI : CSELoads) { + combineMetadataForCSE(PredLoadI, LoadI); } - LI->replaceAllUsesWith(PN); - LI->eraseFromParent(); + LoadI->replaceAllUsesWith(PN); + LoadI->eraseFromParent(); return true; } @@ -1454,6 +1513,9 @@ FindMostPopularDest(BasicBlock *BB, if (PredToDest.second) DestPopularity[PredToDest.second]++; + if (DestPopularity.empty()) + return nullptr; + // Find the most popular dest. DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin(); BasicBlock *MostPopularDest = DPI->first; @@ -1513,12 +1575,12 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, assert(!PredValues.empty() && "ComputeValueKnownInPredecessors returned true with no values"); - DEBUG(dbgs() << "IN BB: " << *BB; - for (const auto &PredValue : PredValues) { - dbgs() << " BB '" << BB->getName() << "': FOUND condition = " - << *PredValue.first - << " for pred '" << PredValue.second->getName() << "'.\n"; - }); + LLVM_DEBUG(dbgs() << "IN BB: " << *BB; + for (const auto &PredValue : PredValues) { + dbgs() << " BB '" << BB->getName() + << "': FOUND condition = " << *PredValue.first + << " for pred '" << PredValue.second->getName() << "'.\n"; + }); // Decide what we want to thread through. Convert our list of known values to // a list of known destinations for each pred. This also discards duplicate @@ -1588,20 +1650,24 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // not thread. By doing so, we do not need to duplicate the current block and // also miss potential opportunities in case we dont/cant duplicate. if (OnlyDest && OnlyDest != MultipleDestSentinel) { - if (PredWithKnownDest == - (size_t)std::distance(pred_begin(BB), pred_end(BB))) { + if (PredWithKnownDest == (size_t)pred_size(BB)) { bool SeenFirstBranchToOnlyDest = false; + std::vector <DominatorTree::UpdateType> Updates; + Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1); for (BasicBlock *SuccBB : successors(BB)) { - if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) + if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) { SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. - else + } else { SuccBB->removePredecessor(BB, true); // This is unreachable successor. + Updates.push_back({DominatorTree::Delete, BB, SuccBB}); + } } // Finally update the terminator. TerminatorInst *Term = BB->getTerminator(); BranchInst::Create(OnlyDest, Term); Term->eraseFromParent(); + DDT->applyUpdates(Updates); // If the condition is now dead due to the removal of the old terminator, // erase it. @@ -1629,8 +1695,20 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // threadable destination (the common case) we can avoid this. BasicBlock *MostPopularDest = OnlyDest; - if (MostPopularDest == MultipleDestSentinel) + if (MostPopularDest == MultipleDestSentinel) { + // Remove any loop headers from the Dest list, ThreadEdge conservatively + // won't process them, but we might have other destination that are eligible + // and we still want to process. + erase_if(PredToDestList, + [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) { + return LoopHeaders.count(PredToDest.second) != 0; + }); + + if (PredToDestList.empty()) + return false; + MostPopularDest = FindMostPopularDest(BB, PredToDestList); + } // Now that we know what the most popular destination is, factor all // predecessors that will jump to it into a single predecessor. @@ -1800,11 +1878,10 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, DenseMap<Instruction*, Value*> &ValueMap) { - for (BasicBlock::iterator PNI = PHIBB->begin(); - PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) { + for (PHINode &PN : PHIBB->phis()) { // Ok, we have a PHI node. Figure out what the incoming value was for the // DestBlock. - Value *IV = PN->getIncomingValueForBlock(OldPred); + Value *IV = PN.getIncomingValueForBlock(OldPred); // Remap the value if necessary. if (Instruction *Inst = dyn_cast<Instruction>(IV)) { @@ -1813,7 +1890,7 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, IV = I->second; } - PN->addIncoming(IV, NewPred); + PN.addIncoming(IV, NewPred); } } @@ -1825,15 +1902,15 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, BasicBlock *SuccBB) { // If threading to the same block as we come from, we would infinite loop. if (SuccBB == BB) { - DEBUG(dbgs() << " Not threading across BB '" << BB->getName() - << "' - would thread to self!\n"); + LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName() + << "' - would thread to self!\n"); return false; } // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) { - DEBUG({ + LLVM_DEBUG({ bool BBIsHeader = LoopHeaders.count(BB); bool SuccIsHeader = LoopHeaders.count(SuccBB); dbgs() << " Not threading across " @@ -1847,8 +1924,8 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold); if (JumpThreadCost > BBDupThreshold) { - DEBUG(dbgs() << " Not threading BB '" << BB->getName() - << "' - Cost is too high: " << JumpThreadCost << "\n"); + LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName() + << "' - Cost is too high: " << JumpThreadCost << "\n"); return false; } @@ -1857,17 +1934,21 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, if (PredBBs.size() == 1) PredBB = PredBBs[0]; else { - DEBUG(dbgs() << " Factoring out " << PredBBs.size() - << " common predecessors.\n"); + LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size() + << " common predecessors.\n"); PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm"); } // And finally, do it! - DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '" - << SuccBB->getName() << "' with cost: " << JumpThreadCost - << ", across block:\n " - << *BB << "\n"); - + LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() + << "' to '" << SuccBB->getName() + << "' with cost: " << JumpThreadCost + << ", across block:\n " << *BB << "\n"); + + if (DDT->pending()) + LVI->disableDT(); + else + LVI->enableDT(); LVI->threadEdge(PredBB, BB, SuccBB); // We are going to have to map operands from the original BB block to the new @@ -1917,15 +1998,32 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, // PHI nodes for NewBB now. AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping); + // Update the terminator of PredBB to jump to NewBB instead of BB. This + // eliminates predecessors from BB, which requires us to simplify any PHI + // nodes in BB. + TerminatorInst *PredTerm = PredBB->getTerminator(); + for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) + if (PredTerm->getSuccessor(i) == BB) { + BB->removePredecessor(PredBB, true); + PredTerm->setSuccessor(i, NewBB); + } + + // Enqueue required DT updates. + DDT->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}, + {DominatorTree::Insert, PredBB, NewBB}, + {DominatorTree::Delete, PredBB, BB}}); + // If there were values defined in BB that are used outside the block, then we // now have to update all uses of the value to use either the original value, // the cloned value, or some PHI derived value. This can require arbitrary // PHI insertion, of which we are prepared to do, clean these up now. SSAUpdater SSAUpdate; SmallVector<Use*, 16> UsesToRename; + for (Instruction &I : *BB) { - // Scan all uses of this instruction to see if it is used outside of its - // block, and if so, record them in UsesToRename. + // Scan all uses of this instruction to see if their uses are no longer + // dominated by the previous def and if so, record them in UsesToRename. + // Also, skip phi operands from PredBB - we'll remove them anyway. for (Use &U : I.uses()) { Instruction *User = cast<Instruction>(U.getUser()); if (PHINode *UserPN = dyn_cast<PHINode>(User)) { @@ -1940,8 +2038,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, // If there are no uses outside the block, we're done with this instruction. if (UsesToRename.empty()) continue; - - DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n"); + LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks @@ -1952,19 +2049,9 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); } - // Ok, NewBB is good to go. Update the terminator of PredBB to jump to - // NewBB instead of BB. This eliminates predecessors from BB, which requires - // us to simplify any PHI nodes in BB. - TerminatorInst *PredTerm = PredBB->getTerminator(); - for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) - if (PredTerm->getSuccessor(i) == BB) { - BB->removePredecessor(PredBB, true); - PredTerm->setSuccessor(i, NewBB); - } - // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. @@ -1984,20 +2071,42 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, const char *Suffix) { + SmallVector<BasicBlock *, 2> NewBBs; + // Collect the frequencies of all predecessors of BB, which will be used to - // update the edge weight on BB->SuccBB. - BlockFrequency PredBBFreq(0); + // update the edge weight of the result of splitting predecessors. + DenseMap<BasicBlock *, BlockFrequency> FreqMap; if (HasProfileData) for (auto Pred : Preds) - PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB); + FreqMap.insert(std::make_pair( + Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB))); + + // In the case when BB is a LandingPad block we create 2 new predecessors + // instead of just one. + if (BB->isLandingPad()) { + std::string NewName = std::string(Suffix) + ".split-lp"; + SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs); + } else { + NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix)); + } - BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix); + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve((2 * Preds.size()) + NewBBs.size()); + for (auto NewBB : NewBBs) { + BlockFrequency NewBBFreq(0); + Updates.push_back({DominatorTree::Insert, NewBB, BB}); + for (auto Pred : predecessors(NewBB)) { + Updates.push_back({DominatorTree::Delete, Pred, BB}); + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + if (HasProfileData) // Update frequencies between Pred -> NewBB. + NewBBFreq += FreqMap.lookup(Pred); + } + if (HasProfileData) // Apply the summed frequency to NewBB. + BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency()); + } - // Set the block frequency of the newly created PredBB, which is the sum of - // frequencies of Preds. - if (HasProfileData) - BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency()); - return PredBB; + DDT->applyUpdates(Updates); + return NewBBs[0]; } bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) { @@ -2126,42 +2235,49 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // cause us to transform this into an irreducible loop, don't do this. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { - DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName() - << "' into predecessor block '" << PredBBs[0]->getName() - << "' - it might create an irreducible loop!\n"); + LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName() + << "' into predecessor block '" << PredBBs[0]->getName() + << "' - it might create an irreducible loop!\n"); return false; } unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold); if (DuplicationCost > BBDupThreshold) { - DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() - << "' - Cost is too high: " << DuplicationCost << "\n"); + LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() + << "' - Cost is too high: " << DuplicationCost << "\n"); return false; } // And finally, do it! Start by factoring the predecessors if needed. + std::vector<DominatorTree::UpdateType> Updates; BasicBlock *PredBB; if (PredBBs.size() == 1) PredBB = PredBBs[0]; else { - DEBUG(dbgs() << " Factoring out " << PredBBs.size() - << " common predecessors.\n"); + LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size() + << " common predecessors.\n"); PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm"); } + Updates.push_back({DominatorTree::Delete, PredBB, BB}); // Okay, we decided to do this! Clone all the instructions in BB onto the end // of PredBB. - DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '" - << PredBB->getName() << "' to eliminate branch on phi. Cost: " - << DuplicationCost << " block is:" << *BB << "\n"); + LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName() + << "' into end of '" << PredBB->getName() + << "' to eliminate branch on phi. Cost: " + << DuplicationCost << " block is:" << *BB << "\n"); // Unless PredBB ends with an unconditional branch, split the edge so that we // can just clone the bits from BB into the end of the new PredBB. BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator()); if (!OldPredBranch || !OldPredBranch->isUnconditional()) { - PredBB = SplitEdge(PredBB, BB); + BasicBlock *OldPredBB = PredBB; + PredBB = SplitEdge(OldPredBB, BB); + Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB}); + Updates.push_back({DominatorTree::Insert, PredBB, BB}); + Updates.push_back({DominatorTree::Delete, OldPredBB, BB}); OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); } @@ -2203,6 +2319,10 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // Otherwise, insert the new instruction into the block. New->setName(BI->getName()); PredBB->getInstList().insert(OldPredBranch->getIterator(), New); + // Update Dominance from simplified New instruction operands. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i))) + Updates.push_back({DominatorTree::Insert, PredBB, SuccBB}); } } @@ -2238,7 +2358,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( if (UsesToRename.empty()) continue; - DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n"); + LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks @@ -2249,7 +2369,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); } // PredBB no longer jumps to BB, remove entries in the PHI node for the edge @@ -2258,6 +2378,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // Remove the unconditional branch at the end of the PredBB block. OldPredBranch->eraseFromParent(); + DDT->applyUpdates(Updates); ++NumDupes; return true; @@ -2300,6 +2421,10 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { // Now check if one of the select values would allow us to constant fold the // terminator in BB. We don't do the transform if both sides fold, those // cases will be threaded in any case. + if (DDT->pending()) + LVI->disableDT(); + else + LVI->enableDT(); LazyValueInfo::Tristate LHSFolds = LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1), CondRHS, Pred, BB, CondCmp); @@ -2330,6 +2455,8 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { // The select is now dead. SI->eraseFromParent(); + DDT->applyUpdates({{DominatorTree::Insert, NewBB, BB}, + {DominatorTree::Insert, Pred, NewBB}}); // Update any other PHI nodes in BB. for (BasicBlock::iterator BI = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(BI); ++BI) @@ -2395,7 +2522,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { break; } } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) { - // Look for a Select in BB that uses PN as condtion. + // Look for a Select in BB that uses PN as condition. if (isUnfoldCandidate(SelectI, U.get())) { SI = SelectI; break; @@ -2408,11 +2535,25 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { // Expand the select. TerminatorInst *Term = SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + BasicBlock *SplitBB = SI->getParent(); + BasicBlock *NewBB = Term->getParent(); PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); NewPN->addIncoming(SI->getFalseValue(), BB); SI->replaceAllUsesWith(NewPN); SI->eraseFromParent(); + // NewBB and SplitBB are newly created blocks which require insertion. + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3); + Updates.push_back({DominatorTree::Insert, BB, SplitBB}); + Updates.push_back({DominatorTree::Insert, BB, NewBB}); + Updates.push_back({DominatorTree::Insert, NewBB, SplitBB}); + // BB's successors were moved to SplitBB, update DDT accordingly. + for (auto *Succ : successors(SplitBB)) { + Updates.push_back({DominatorTree::Delete, BB, Succ}); + Updates.push_back({DominatorTree::Insert, SplitBB, Succ}); + } + DDT->applyUpdates(Updates); return true; } return false; @@ -2499,8 +2640,8 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, if (!TrueDestIsSafe && !FalseDestIsSafe) return false; - BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest; - BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest; + BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest; + BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest; ValueToValueMapTy UnguardedMapping, GuardedMapping; Instruction *AfterGuard = Guard->getNextNode(); @@ -2509,18 +2650,29 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, return false; // Duplicate all instructions before the guard and the guard itself to the // branch where implication is not proved. - GuardedBlock = DuplicateInstructionsInSplitBetween( - BB, GuardedBlock, AfterGuard, GuardedMapping); + BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween( + BB, PredGuardedBlock, AfterGuard, GuardedMapping); assert(GuardedBlock && "Could not create the guarded block?"); // Duplicate all instructions before the guard in the unguarded branch. // Since we have successfully duplicated the guarded block and this block // has fewer instructions, we expect it to succeed. - UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock, - Guard, UnguardedMapping); + BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween( + BB, PredUnguardedBlock, Guard, UnguardedMapping); assert(UnguardedBlock && "Could not create the unguarded block?"); - DEBUG(dbgs() << "Moved guard " << *Guard << " to block " - << GuardedBlock->getName() << "\n"); - + LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block " + << GuardedBlock->getName() << "\n"); + // DuplicateInstructionsInSplitBetween inserts a new block "BB.split" between + // PredBB and BB. We need to perform two inserts and one delete for each of + // the above calls to update Dominators. + DDT->applyUpdates( + {// Guarded block split. + {DominatorTree::Delete, PredGuardedBlock, BB}, + {DominatorTree::Insert, PredGuardedBlock, GuardedBlock}, + {DominatorTree::Insert, GuardedBlock, BB}, + // Unguarded block split. + {DominatorTree::Delete, PredUnguardedBlock, BB}, + {DominatorTree::Insert, PredUnguardedBlock, UnguardedBlock}, + {DominatorTree::Insert, UnguardedBlock, BB}}); // Some instructions before the guard may still have uses. For them, we need // to create Phi nodes merging their copies in both guarded and unguarded // branches. Those instructions that have no uses can be just removed. |