diff options
Diffstat (limited to 'lib/Transforms/Scalar/SimpleLoopUnswitch.cpp')
-rw-r--r-- | lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 653 |
1 files changed, 528 insertions, 125 deletions
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 5834b619046b..5a67178cef37 100644 --- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -19,11 +19,14 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -59,7 +62,11 @@ using namespace llvm; STATISTIC(NumBranches, "Number of branches unswitched"); STATISTIC(NumSwitches, "Number of switches unswitched"); +STATISTIC(NumGuards, "Number of guards turned into branches for unswitching"); STATISTIC(NumTrivial, "Number of unswitches that are trivial"); +STATISTIC( + NumCostMultiplierSkipped, + "Number of unswitch candidates that had their cost multiplier skipped"); static cl::opt<bool> EnableNonTrivialUnswitch( "enable-nontrivial-unswitch", cl::init(false), cl::Hidden, @@ -70,6 +77,22 @@ static cl::opt<int> UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop.")); +static cl::opt<bool> EnableUnswitchCostMultiplier( + "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, + cl::desc("Enable unswitch cost multiplier that prohibits exponential " + "explosion in nontrivial unswitch.")); +static cl::opt<int> UnswitchSiblingsToplevelDiv( + "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, + cl::desc("Toplevel siblings divisor for cost multiplier.")); +static cl::opt<int> UnswitchNumInitialUnscaledCandidates( + "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, + cl::desc("Number of unswitch candidates that are ignored when calculating " + "cost multiplier.")); +static cl::opt<bool> UnswitchGuards( + "simple-loop-unswitch-guards", cl::init(true), cl::Hidden, + cl::desc("If enabled, simple loop unswitching will also consider " + "llvm.experimental.guard intrinsics as unswitch candidates.")); + /// Collect all of the loop invariant input values transitively used by the /// homogeneous instruction graph from a given root. /// @@ -302,10 +325,11 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, formLCSSA(*OldContainingL, DT, &LI, nullptr); // We shouldn't need to form dedicated exits because the exit introduced - // here is the (just split by unswitching) preheader. As such, it is - // necessarily dedicated. - assert(OldContainingL->hasDedicatedExits() && - "Unexpected predecessor of hoisted loop preheader!"); + // here is the (just split by unswitching) preheader. However, after trivial + // unswitching it is possible to get new non-dedicated exits out of parent + // loop so let's conservatively form dedicated exit blocks and figure out + // if we can optimize later. + formDedicatedExitBlocks(OldContainingL, &DT, &LI, /*PreserveLCSSA*/ true); } } @@ -327,7 +351,8 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, /// If `SE` is not null, it will be updated based on the potential loop SCEVs /// invalidated by this. static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, - LoopInfo &LI, ScalarEvolution *SE) { + LoopInfo &LI, ScalarEvolution *SE, + MemorySSAUpdater *MSSAU) { assert(BI.isConditional() && "Can only unswitch a conditional branch!"); LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n"); @@ -401,11 +426,14 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, SE->forgetTopmostLoop(&L); } + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + // Split the preheader, so that we know that there is a safe place to insert // the conditional branch. We will change the preheader to have a conditional // branch on LoopCond. BasicBlock *OldPH = L.getLoopPreheader(); - BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU); // Now that we have a place to insert the conditional branch, create a place // to branch to: this is the exit block out of the loop that we are @@ -417,9 +445,13 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, "A branch's parent isn't a predecessor!"); UnswitchedBB = LoopExitBB; } else { - UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI); + UnswitchedBB = + SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU); } + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + // Actually move the invariant uses into the unswitched position. If possible, // we do this by moving the instructions, but when doing partial unswitching // we do it by building a new merge of the values in the unswitched position. @@ -430,12 +462,17 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, // its successors. OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(), BI); + if (MSSAU) { + // Temporarily clone the terminator, to make MSSA update cheaper by + // separating "insert edge" updates from "remove edge" ones. + ParentBB->getInstList().push_back(BI.clone()); + } else { + // Create a new unconditional branch that will continue the loop as a new + // terminator. + BranchInst::Create(ContinueBB, ParentBB); + } BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); BI.setSuccessor(1 - LoopExitSuccIdx, NewPH); - - // Create a new unconditional branch that will continue the loop as a new - // terminator. - BranchInst::Create(ContinueBB, ParentBB); } else { // Only unswitching a subset of inputs to the condition, so we will need to // build a new branch that merges the invariant inputs. @@ -451,6 +488,32 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, *UnswitchedBB, *NewPH); } + // Update the dominator tree with the added edge. + DT.insertEdge(OldPH, UnswitchedBB); + + // After the dominator tree was updated with the added edge, update MemorySSA + // if available. + if (MSSAU) { + SmallVector<CFGUpdate, 1> Updates; + Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB}); + MSSAU->applyInsertUpdates(Updates, DT); + } + + // Finish updating dominator tree and memory ssa for full unswitch. + if (FullUnswitch) { + if (MSSAU) { + // Remove the cloned branch instruction. + ParentBB->getTerminator()->eraseFromParent(); + // Create unconditional branch now. + BranchInst::Create(ContinueBB, ParentBB); + MSSAU->removeEdge(ParentBB, LoopExitBB); + } + DT.deleteEdge(ParentBB, LoopExitBB); + } + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + // Rewrite the relevant PHI nodes. if (UnswitchedBB == LoopExitBB) rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH); @@ -458,13 +521,6 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB, *ParentBB, *OldPH, FullUnswitch); - // Now we need to update the dominator tree. - SmallVector<DominatorTree::UpdateType, 2> DTUpdates; - DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB}); - if (FullUnswitch) - DTUpdates.push_back({DT.Delete, ParentBB, LoopExitBB}); - DT.applyUpdates(DTUpdates); - // The constant we can replace all of our invariants with inside the loop // body. If any of the invariants have a value other than this the loop won't // be entered. @@ -482,6 +538,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, if (FullUnswitch) hoistLoopToNewParent(L, *NewPH, DT, LI); + LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n"); ++NumTrivial; ++NumBranches; return true; @@ -514,7 +571,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, /// If `SE` is not null, it will be updated based on the potential loop SCEVs /// invalidated by this. static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, - LoopInfo &LI, ScalarEvolution *SE) { + LoopInfo &LI, ScalarEvolution *SE, + MemorySSAUpdater *MSSAU) { LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n"); Value *LoopCond = SI.getCondition(); @@ -539,7 +597,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, else if (ExitCaseIndices.empty()) return false; - LLVM_DEBUG(dbgs() << " unswitching trivial cases...\n"); + LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n"); + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); // We may need to invalidate SCEVs for the outermost loop reached by any of // the exits. @@ -603,7 +664,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, // Split the preheader, so that we know that there is a safe place to insert // the switch. BasicBlock *OldPH = L.getLoopPreheader(); - BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU); OldPH->getTerminator()->eraseFromParent(); // Now add the unswitched switch. @@ -626,9 +687,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH); } else { auto *SplitBB = - SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); - rewritePHINodesForExitAndUnswitchedBlocks( - *DefaultExitBB, *SplitBB, *ParentBB, *OldPH, /*FullUnswitch*/ true); + SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU); + rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB, + *ParentBB, *OldPH, + /*FullUnswitch*/ true); DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; } } @@ -652,9 +714,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; if (!SplitExitBB) { // If this is the first time we see this, do the split and remember it. - SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); - rewritePHINodesForExitAndUnswitchedBlocks( - *ExitBB, *SplitExitBB, *ParentBB, *OldPH, /*FullUnswitch*/ true); + SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU); + rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB, + *ParentBB, *OldPH, + /*FullUnswitch*/ true); } // Update the case pair to point to the split block. CasePair.second = SplitExitBB; @@ -731,6 +794,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB}); } DT.applyUpdates(DTUpdates); + + if (MSSAU) { + MSSAU->applyUpdates(DTUpdates, DT); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + } + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); // We may have changed the nesting relationship for this loop so hoist it to @@ -739,6 +809,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, ++NumTrivial; ++NumSwitches; + LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n"); return true; } @@ -755,7 +826,8 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, /// If `SE` is not null, it will be updated based on the potential loop SCEVs /// invalidated by this. static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, - LoopInfo &LI, ScalarEvolution *SE) { + LoopInfo &LI, ScalarEvolution *SE, + MemorySSAUpdater *MSSAU) { bool Changed = false; // If loop header has only one reachable successor we should keep looking for @@ -780,7 +852,7 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, [](Instruction &I) { return I.mayHaveSideEffects(); })) return Changed; - TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); + Instruction *CurrentTerm = CurrentBB->getTerminator(); if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) { // Don't bother trying to unswitch past a switch with a constant @@ -789,7 +861,7 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, if (isa<Constant>(SI->getCondition())) return Changed; - if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE)) + if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU)) // Couldn't unswitch this one so we're done. return Changed; @@ -821,7 +893,7 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, // Found a trivial condition candidate: non-foldable conditional branch. If // we fail to unswitch this, we can't do anything else that is trivial. - if (!unswitchTrivialBranch(L, *BI, DT, LI, SE)) + if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU)) return Changed; // Mark that we managed to unswitch something. @@ -874,7 +946,7 @@ static BasicBlock *buildClonedLoopBlocks( const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC, - DominatorTree &DT, LoopInfo &LI) { + DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) { SmallVector<BasicBlock *, 4> NewBlocks; NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size()); @@ -919,7 +991,7 @@ static BasicBlock *buildClonedLoopBlocks( // place to merge the CFG, so split the exit first. This is always safe to // do because there cannot be any non-loop predecessors of a loop exit in // loop simplified form. - auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); + auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU); // Rearrange the names to make it easier to write test cases by having the // exit block carry the suffix rather than the merge block carrying the @@ -1262,11 +1334,10 @@ static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks, // matter as we're just trying to build up the map from inside-out; we use // the map in a more stably ordered way below. auto OrderedClonedExitsInLoops = ClonedExitsInLoops; - llvm::sort(OrderedClonedExitsInLoops.begin(), OrderedClonedExitsInLoops.end(), - [&](BasicBlock *LHS, BasicBlock *RHS) { - return ExitLoopMap.lookup(LHS)->getLoopDepth() < - ExitLoopMap.lookup(RHS)->getLoopDepth(); - }); + llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) { + return ExitLoopMap.lookup(LHS)->getLoopDepth() < + ExitLoopMap.lookup(RHS)->getLoopDepth(); + }); // Populate the existing ExitLoopMap with everything reachable from each // exit, starting from the inner most exit. @@ -1351,7 +1422,7 @@ static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks, static void deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks, ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, - DominatorTree &DT) { + DominatorTree &DT, MemorySSAUpdater *MSSAU) { // Find all the dead clones, and remove them from their successors. SmallVector<BasicBlock *, 16> DeadBlocks; for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks)) @@ -1363,6 +1434,13 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks, DeadBlocks.push_back(ClonedBB); } + // Remove all MemorySSA in the dead blocks + if (MSSAU) { + SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(), + DeadBlocks.end()); + MSSAU->removeBlocks(DeadBlockSet); + } + // Drop any remaining references to break cycles. for (BasicBlock *BB : DeadBlocks) BB->dropAllReferences(); @@ -1371,21 +1449,33 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks, BB->eraseFromParent(); } -static void -deleteDeadBlocksFromLoop(Loop &L, - SmallVectorImpl<BasicBlock *> &ExitBlocks, - DominatorTree &DT, LoopInfo &LI) { - // Find all the dead blocks, and remove them from their successors. - SmallVector<BasicBlock *, 16> DeadBlocks; - for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks)) - if (!DT.isReachableFromEntry(BB)) { - for (BasicBlock *SuccBB : successors(BB)) +static void deleteDeadBlocksFromLoop(Loop &L, + SmallVectorImpl<BasicBlock *> &ExitBlocks, + DominatorTree &DT, LoopInfo &LI, + MemorySSAUpdater *MSSAU) { + // Find all the dead blocks tied to this loop, and remove them from their + // successors. + SmallPtrSet<BasicBlock *, 16> DeadBlockSet; + + // Start with loop/exit blocks and get a transitive closure of reachable dead + // blocks. + SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(), + ExitBlocks.end()); + DeathCandidates.append(L.blocks().begin(), L.blocks().end()); + while (!DeathCandidates.empty()) { + auto *BB = DeathCandidates.pop_back_val(); + if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) { + for (BasicBlock *SuccBB : successors(BB)) { SuccBB->removePredecessor(BB); - DeadBlocks.push_back(BB); + DeathCandidates.push_back(SuccBB); + } + DeadBlockSet.insert(BB); } + } - SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(), - DeadBlocks.end()); + // Remove all MemorySSA in the dead blocks + if (MSSAU) + MSSAU->removeBlocks(DeadBlockSet); // Filter out the dead blocks from the exit blocks list so that it can be // used in the caller. @@ -1394,7 +1484,7 @@ deleteDeadBlocksFromLoop(Loop &L, // Walk from this loop up through its parents removing all of the dead blocks. for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) { - for (auto *BB : DeadBlocks) + for (auto *BB : DeadBlockSet) ParentL->getBlocksSet().erase(BB); llvm::erase_if(ParentL->getBlocksVector(), [&](BasicBlock *BB) { return DeadBlockSet.count(BB); }); @@ -1419,7 +1509,7 @@ deleteDeadBlocksFromLoop(Loop &L, // Remove the loop mappings for the dead blocks and drop all the references // from these blocks to others to handle cyclic references as we start // deleting the blocks themselves. - for (auto *BB : DeadBlocks) { + for (auto *BB : DeadBlockSet) { // Check that the dominator tree has already been updated. assert(!DT.getNode(BB) && "Should already have cleared domtree!"); LI.changeLoopFor(BB, nullptr); @@ -1428,7 +1518,7 @@ deleteDeadBlocksFromLoop(Loop &L, // Actually delete the blocks now that they've been fully unhooked from the // IR. - for (auto *BB : DeadBlocks) + for (auto *BB : DeadBlockSet) BB->eraseFromParent(); } @@ -1782,11 +1872,11 @@ void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) { } while (!DomWorklist.empty()); } -static bool unswitchNontrivialInvariants( - Loop &L, TerminatorInst &TI, ArrayRef<Value *> Invariants, - DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, - function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB, - ScalarEvolution *SE) { +static void unswitchNontrivialInvariants( + Loop &L, Instruction &TI, ArrayRef<Value *> Invariants, + SmallVectorImpl<BasicBlock *> &ExitBlocks, DominatorTree &DT, LoopInfo &LI, + AssumptionCache &AC, function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB, + ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { auto *ParentBB = TI.getParent(); BranchInst *BI = dyn_cast<BranchInst>(&TI); SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI); @@ -1803,6 +1893,9 @@ static bool unswitchNontrivialInvariants( assert(isa<Instruction>(BI->getCondition()) && "Partial unswitching requires an instruction as the condition!"); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + // Constant and BBs tracking the cloned and continuing successor. When we are // unswitching the entire condition, this can just be trivially chosen to // unswitch towards `true`. However, when we are unswitching a set of @@ -1841,19 +1934,12 @@ static bool unswitchNontrivialInvariants( // whatever reason). assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!"); - SmallVector<BasicBlock *, 4> ExitBlocks; - L.getUniqueExitBlocks(ExitBlocks); - - // We cannot unswitch if exit blocks contain a cleanuppad instruction as we - // don't know how to split those exit blocks. - // FIXME: We should teach SplitBlock to handle this and remove this - // restriction. - for (auto *ExitBB : ExitBlocks) - if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) - return false; - // Compute the parent loop now before we start hacking on things. Loop *ParentL = L.getParentLoop(); + // Get blocks in RPO order for MSSA update, before changing the CFG. + LoopBlocksRPO LBRPO(&L); + if (MSSAU) + LBRPO.perform(&LI); // Compute the outer-most loop containing one of our exit blocks. This is the // furthest up our loopnest which can be mutated, which we will use below to @@ -1903,7 +1989,7 @@ static bool unswitchNontrivialInvariants( // between the unswitched versions, and we will have a new preheader for the // original loop. BasicBlock *SplitBB = L.getLoopPreheader(); - BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI); + BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU); // Keep track of the dominator tree updates needed. SmallVector<DominatorTree::UpdateType, 4> DTUpdates; @@ -1916,7 +2002,7 @@ static bool unswitchNontrivialInvariants( VMaps.emplace_back(new ValueToValueMapTy()); ClonedPHs[SuccBB] = buildClonedLoopBlocks( L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB, - DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI); + DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU); } // The stitching of the branched code back together depends on whether we're @@ -1924,7 +2010,63 @@ static bool unswitchNontrivialInvariants( // nuke the initial terminator placed in the split block. SplitBB->getTerminator()->eraseFromParent(); if (FullUnswitch) { - // First we need to unhook the successor relationship as we'll be replacing + // Splice the terminator from the original loop and rewrite its + // successors. + SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI); + + // Keep a clone of the terminator for MSSA updates. + Instruction *NewTI = TI.clone(); + ParentBB->getInstList().push_back(NewTI); + + // First wire up the moved terminator to the preheaders. + if (BI) { + BasicBlock *ClonedPH = ClonedPHs.begin()->second; + BI->setSuccessor(ClonedSucc, ClonedPH); + BI->setSuccessor(1 - ClonedSucc, LoopPH); + DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); + } else { + assert(SI && "Must either be a branch or switch!"); + + // Walk the cases and directly update their successors. + assert(SI->getDefaultDest() == RetainedSuccBB && + "Not retaining default successor!"); + SI->setDefaultDest(LoopPH); + for (auto &Case : SI->cases()) + if (Case.getCaseSuccessor() == RetainedSuccBB) + Case.setSuccessor(LoopPH); + else + Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second); + + // We need to use the set to populate domtree updates as even when there + // are multiple cases pointing at the same successor we only want to + // remove and insert one edge in the domtree. + for (BasicBlock *SuccBB : UnswitchedSuccBBs) + DTUpdates.push_back( + {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second}); + } + + if (MSSAU) { + DT.applyUpdates(DTUpdates); + DTUpdates.clear(); + + // Remove all but one edge to the retained block and all unswitched + // blocks. This is to avoid having duplicate entries in the cloned Phis, + // when we know we only keep a single edge for each case. + MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB); + for (BasicBlock *SuccBB : UnswitchedSuccBBs) + MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB); + + for (auto &VMap : VMaps) + MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap, + /*IgnoreIncomingWithNoClones=*/true); + MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT); + + // Remove all edges to unswitched blocks. + for (BasicBlock *SuccBB : UnswitchedSuccBBs) + MSSAU->removeEdge(ParentBB, SuccBB); + } + + // Now unhook the successor relationship as we'll be replacing // the terminator with a direct branch. This is much simpler for branches // than switches so we handle those first. if (BI) { @@ -1942,9 +2084,10 @@ static bool unswitchNontrivialInvariants( // is a duplicate edge to the retained successor as the retained successor // is always the default successor and as we'll replace this with a direct // branch we no longer need the duplicate entries in the PHI nodes. - assert(SI->getDefaultDest() == RetainedSuccBB && + SwitchInst *NewSI = cast<SwitchInst>(NewTI); + assert(NewSI->getDefaultDest() == RetainedSuccBB && "Not retaining default successor!"); - for (auto &Case : SI->cases()) + for (auto &Case : NewSI->cases()) Case.getCaseSuccessor()->removePredecessor( ParentBB, /*DontDeleteUselessPHIs*/ true); @@ -1956,34 +2099,8 @@ static bool unswitchNontrivialInvariants( DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB}); } - // Now that we've unhooked the successor relationship, splice the terminator - // from the original loop to the split. - SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI); - - // Now wire up the terminator to the preheaders. - if (BI) { - BasicBlock *ClonedPH = ClonedPHs.begin()->second; - BI->setSuccessor(ClonedSucc, ClonedPH); - BI->setSuccessor(1 - ClonedSucc, LoopPH); - DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); - } else { - assert(SI && "Must either be a branch or switch!"); - - // Walk the cases and directly update their successors. - SI->setDefaultDest(LoopPH); - for (auto &Case : SI->cases()) - if (Case.getCaseSuccessor() == RetainedSuccBB) - Case.setSuccessor(LoopPH); - else - Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second); - - // We need to use the set to populate domtree updates as even when there - // are multiple cases pointing at the same successor we only want to - // remove and insert one edge in the domtree. - for (BasicBlock *SuccBB : UnswitchedSuccBBs) - DTUpdates.push_back( - {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second}); - } + // After MSSAU update, remove the cloned terminator instruction NewTI. + ParentBB->getTerminator()->eraseFromParent(); // Create a new unconditional branch to the continuing block (as opposed to // the one cloned). @@ -2002,12 +2119,19 @@ static bool unswitchNontrivialInvariants( // Apply the updates accumulated above to get an up-to-date dominator tree. DT.applyUpdates(DTUpdates); + if (!FullUnswitch && MSSAU) { + // Update MSSA for partial unswitch, after DT update. + SmallVector<CFGUpdate, 1> Updates; + Updates.push_back( + {cfg::UpdateKind::Insert, SplitBB, ClonedPHs.begin()->second}); + MSSAU->applyInsertUpdates(Updates, DT); + } // Now that we have an accurate dominator tree, first delete the dead cloned // blocks so that we can accurately build any cloned loops. It is important to // not delete the blocks from the original loop yet because we still want to // reference the original loop to understand the cloned loop's structure. - deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT); + deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU); // Build the cloned loop structure itself. This may be substantially // different from the original structure due to the simplified CFG. This also @@ -2019,10 +2143,17 @@ static bool unswitchNontrivialInvariants( // Now that our cloned loops have been built, we can update the original loop. // First we delete the dead blocks from it and then we rebuild the loop // structure taking these deletions into account. - deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI); + deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU); + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + SmallVector<Loop *, 4> HoistedLoops; bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + // This transformation has a high risk of corrupting the dominator tree, and // the below steps to rebuild loop structures will result in hard to debug // errors in that case so verify that the dominator tree is sane first. @@ -2038,6 +2169,18 @@ static bool unswitchNontrivialInvariants( assert(UnswitchedSuccBBs.size() == 1 && "Only one possible unswitched block for a branch!"); BasicBlock *ClonedPH = ClonedPHs.begin()->second; + + // When considering multiple partially-unswitched invariants + // we cant just go replace them with constants in both branches. + // + // For 'AND' we infer that true branch ("continue") means true + // for each invariant operand. + // For 'OR' we can infer that false branch ("continue") means false + // for each invariant operand. + // So it happens that for multiple-partial case we dont replace + // in the unswitched branch. + bool ReplaceUnswitched = FullUnswitch || (Invariants.size() == 1); + ConstantInt *UnswitchedReplacement = Direction ? ConstantInt::getTrue(BI->getContext()) : ConstantInt::getFalse(BI->getContext()); @@ -2057,7 +2200,8 @@ static bool unswitchNontrivialInvariants( // unswitched if in the cloned blocks. if (DT.dominates(LoopPH, UserI->getParent())) U->set(ContinueReplacement); - else if (DT.dominates(ClonedPH, UserI->getParent())) + else if (ReplaceUnswitched && + DT.dominates(ClonedPH, UserI->getParent())) U->set(UnswitchedReplacement); } } @@ -2134,8 +2278,13 @@ static bool unswitchNontrivialInvariants( SibLoops.push_back(UpdatedL); UnswitchCB(IsStillLoop, SibLoops); - ++NumBranches; - return true; + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + if (BI) + ++NumBranches; + else + ++NumSwitches; } /// Recursively compute the cost of a dominator subtree based on the per-block @@ -2171,19 +2320,208 @@ computeDomSubtreeCost(DomTreeNode &N, return Cost; } +/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch, +/// making the following replacement: +/// +/// --code before guard-- +/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] +/// --code after guard-- +/// +/// into +/// +/// --code before guard-- +/// br i1 %cond, label %guarded, label %deopt +/// +/// guarded: +/// --code after guard-- +/// +/// deopt: +/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ] +/// unreachable +/// +/// It also makes all relevant DT and LI updates, so that all structures are in +/// valid state after this transform. +static BranchInst * +turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, + SmallVectorImpl<BasicBlock *> &ExitBlocks, + DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) { + SmallVector<DominatorTree::UpdateType, 4> DTUpdates; + LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n"); + BasicBlock *CheckBB = GI->getParent(); + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + // Remove all CheckBB's successors from DomTree. A block can be seen among + // successors more than once, but for DomTree it should be added only once. + SmallPtrSet<BasicBlock *, 4> Successors; + for (auto *Succ : successors(CheckBB)) + if (Successors.insert(Succ).second) + DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ}); + + Instruction *DeoptBlockTerm = + SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true); + BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator()); + // SplitBlockAndInsertIfThen inserts control flow that branches to + // DeoptBlockTerm if the condition is true. We want the opposite. + CheckBI->swapSuccessors(); + + BasicBlock *GuardedBlock = CheckBI->getSuccessor(0); + GuardedBlock->setName("guarded"); + CheckBI->getSuccessor(1)->setName("deopt"); + BasicBlock *DeoptBlock = CheckBI->getSuccessor(1); + + // We now have a new exit block. + ExitBlocks.push_back(CheckBI->getSuccessor(1)); + + if (MSSAU) + MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI); + + GI->moveBefore(DeoptBlockTerm); + GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext())); + + // Add new successors of CheckBB into DomTree. + for (auto *Succ : successors(CheckBB)) + DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ}); + + // Now the blocks that used to be CheckBB's successors are GuardedBlock's + // successors. + for (auto *Succ : Successors) + DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ}); + + // Make proper changes to DT. + DT.applyUpdates(DTUpdates); + // Inform LI of a new loop block. + L.addBasicBlockToLoop(GuardedBlock, LI); + + if (MSSAU) { + MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI)); + MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::End); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + } + + ++NumGuards; + return CheckBI; +} + +/// Cost multiplier is a way to limit potentially exponential behavior +/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch +/// candidates available. Also accounting for the number of "sibling" loops with +/// the idea to account for previous unswitches that already happened on this +/// cluster of loops. There was an attempt to keep this formula simple, +/// just enough to limit the worst case behavior. Even if it is not that simple +/// now it is still not an attempt to provide a detailed heuristic size +/// prediction. +/// +/// TODO: Make a proper accounting of "explosion" effect for all kinds of +/// unswitch candidates, making adequate predictions instead of wild guesses. +/// That requires knowing not just the number of "remaining" candidates but +/// also costs of unswitching for each of these candidates. +static int calculateUnswitchCostMultiplier( + Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT, + ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>> + UnswitchCandidates) { + + // Guards and other exiting conditions do not contribute to exponential + // explosion as soon as they dominate the latch (otherwise there might be + // another path to the latch remaining that does not allow to eliminate the + // loop copy on unswitch). + BasicBlock *Latch = L.getLoopLatch(); + BasicBlock *CondBlock = TI.getParent(); + if (DT.dominates(CondBlock, Latch) && + (isGuard(&TI) || + llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) { + return L.contains(SuccBB); + }) <= 1)) { + NumCostMultiplierSkipped++; + return 1; + } + + auto *ParentL = L.getParentLoop(); + int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size() + : std::distance(LI.begin(), LI.end())); + // Count amount of clones that all the candidates might cause during + // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases. + int UnswitchedClones = 0; + for (auto Candidate : UnswitchCandidates) { + Instruction *CI = Candidate.first; + BasicBlock *CondBlock = CI->getParent(); + bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch); + if (isGuard(CI)) { + if (!SkipExitingSuccessors) + UnswitchedClones++; + continue; + } + int NonExitingSuccessors = llvm::count_if( + successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) { + return !SkipExitingSuccessors || L.contains(SuccBB); + }); + UnswitchedClones += Log2_32(NonExitingSuccessors); + } + + // Ignore up to the "unscaled candidates" number of unswitch candidates + // when calculating the power-of-two scaling of the cost. The main idea + // with this control is to allow a small number of unswitches to happen + // and rely more on siblings multiplier (see below) when the number + // of candidates is small. + unsigned ClonesPower = + std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0); + + // Allowing top-level loops to spread a bit more than nested ones. + int SiblingsMultiplier = + std::max((ParentL ? SiblingsCount + : SiblingsCount / (int)UnswitchSiblingsToplevelDiv), + 1); + // Compute the cost multiplier in a way that won't overflow by saturating + // at an upper bound. + int CostMultiplier; + if (ClonesPower > Log2_32(UnswitchThreshold) || + SiblingsMultiplier > UnswitchThreshold) + CostMultiplier = UnswitchThreshold; + else + CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower), + (int)UnswitchThreshold); + + LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier + << " (siblings " << SiblingsMultiplier << " * clones " + << (1 << ClonesPower) << ")" + << " for unswitch candidate: " << TI << "\n"); + return CostMultiplier; +} + static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB, - ScalarEvolution *SE) { + ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { // Collect all invariant conditions within this loop (as opposed to an inner // loop which would be handled when visiting that inner loop). - SmallVector<std::pair<TerminatorInst *, TinyPtrVector<Value *>>, 4> + SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4> UnswitchCandidates; + + // Whether or not we should also collect guards in the loop. + bool CollectGuards = false; + if (UnswitchGuards) { + auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction( + Intrinsic::getName(Intrinsic::experimental_guard)); + if (GuardDecl && !GuardDecl->use_empty()) + CollectGuards = true; + } + for (auto *BB : L.blocks()) { if (LI.getLoopFor(BB) != &L) continue; + if (CollectGuards) + for (auto &I : *BB) + if (isGuard(&I)) { + auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0); + // TODO: Support AND, OR conditions and partial unswitching. + if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond)) + UnswitchCandidates.push_back({&I, {Cond}}); + } + if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { // We can only consider fully loop-invariant switch conditions as we need // to completely eliminate the switch after unswitching. @@ -2231,6 +2569,19 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI)) return false; + SmallVector<BasicBlock *, 4> ExitBlocks; + L.getUniqueExitBlocks(ExitBlocks); + + // We cannot unswitch if exit blocks contain a cleanuppad instruction as we + // don't know how to split those exit blocks. + // FIXME: We should teach SplitBlock to handle this and remove this + // restriction. + for (auto *ExitBB : ExitBlocks) + if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) { + dbgs() << "Cannot unswitch because of cleanuppad in exit block\n"; + return false; + } + LLVM_DEBUG( dbgs() << "Considering " << UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"); @@ -2288,7 +2639,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, SmallDenseMap<DomTreeNode *, int, 4> DTCostMap; // Given a terminator which might be unswitched, computes the non-duplicated // cost for that terminator. - auto ComputeUnswitchedCost = [&](TerminatorInst &TI, bool FullUnswitch) { + auto ComputeUnswitchedCost = [&](Instruction &TI, bool FullUnswitch) { BasicBlock &BB = *TI.getParent(); SmallPtrSet<BasicBlock *, 4> Visited; @@ -2335,22 +2686,40 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, // Now scale the cost by the number of unique successors minus one. We // subtract one because there is already at least one copy of the entire // loop. This is computing the new cost of unswitching a condition. - assert(Visited.size() > 1 && + // Note that guards always have 2 unique successors that are implicit and + // will be materialized if we decide to unswitch it. + int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size(); + assert(SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"); - return Cost * (Visited.size() - 1); + return Cost * (SuccessorsCount - 1); }; - TerminatorInst *BestUnswitchTI = nullptr; + Instruction *BestUnswitchTI = nullptr; int BestUnswitchCost; ArrayRef<Value *> BestUnswitchInvariants; for (auto &TerminatorAndInvariants : UnswitchCandidates) { - TerminatorInst &TI = *TerminatorAndInvariants.first; + Instruction &TI = *TerminatorAndInvariants.first; ArrayRef<Value *> Invariants = TerminatorAndInvariants.second; BranchInst *BI = dyn_cast<BranchInst>(&TI); int CandidateCost = ComputeUnswitchedCost( TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 && Invariants[0] == BI->getCondition())); - LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost - << " for unswitch candidate: " << TI << "\n"); + // Calculate cost multiplier which is a tool to limit potentially + // exponential behavior of loop-unswitch. + if (EnableUnswitchCostMultiplier) { + int CostMultiplier = + calculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates); + assert( + (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && + "cost multiplier needs to be in the range of 1..UnswitchThreshold"); + CandidateCost *= CostMultiplier; + LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost + << " (multiplier: " << CostMultiplier << ")" + << " for unswitch candidate: " << TI << "\n"); + } else { + LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost + << " for unswitch candidate: " << TI << "\n"); + } + if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) { BestUnswitchTI = &TI; BestUnswitchCost = CandidateCost; @@ -2364,11 +2733,17 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, return false; } - LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = " + // If the best candidate is a guard, turn it into a branch. + if (isGuard(BestUnswitchTI)) + BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L, + ExitBlocks, DT, LI, MSSAU); + + LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << BestUnswitchCost << ") terminator: " << *BestUnswitchTI << "\n"); - return unswitchNontrivialInvariants( - L, *BestUnswitchTI, BestUnswitchInvariants, DT, LI, AC, UnswitchCB, SE); + unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants, + ExitBlocks, DT, LI, AC, UnswitchCB, SE, MSSAU); + return true; } /// Unswitch control flow predicated on loop invariant conditions. @@ -2380,6 +2755,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, /// /// The `DT`, `LI`, `AC`, `TTI` parameters are required analyses that are also /// updated based on the unswitch. +/// The `MSSA` analysis is also updated if valid (i.e. its use is enabled). /// /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is /// true, we will attempt to do non-trivial unswitching as well as trivial @@ -2395,7 +2771,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, bool NonTrivial, function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB, - ScalarEvolution *SE) { + ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { assert(L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching."); bool Changed = false; @@ -2405,7 +2781,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, return false; // Try trivial unswitch first before loop over other basic blocks in the loop. - if (unswitchAllTrivialConditions(L, DT, LI, SE)) { + if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) { // If we unswitched successfully we will want to clean up the loop before // processing it further so just mark it as unswitched and return. UnswitchCB(/*CurrentLoopValid*/ true, {}); @@ -2426,7 +2802,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, // Try to unswitch the best invariant condition. We prefer this full unswitch to // a partial unswitch when possible below the threshold. - if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE)) + if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE, MSSAU)) return true; // No other opportunities to unswitch. @@ -2460,10 +2836,19 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, U.markLoopAsDeleted(L, LoopName); }; + Optional<MemorySSAUpdater> MSSAU; + if (AR.MSSA) { + MSSAU = MemorySSAUpdater(AR.MSSA); + if (VerifyMemorySSA) + AR.MSSA->verifyMemorySSA(); + } if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, UnswitchCB, - &AR.SE)) + &AR.SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr)) return PreservedAnalyses::all(); + if (AR.MSSA && VerifyMemorySSA) + AR.MSSA->verifyMemorySSA(); + // Historically this pass has had issues with the dominator tree so verify it // in asserts builds. assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast)); @@ -2489,6 +2874,10 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + if (EnableMSSALoopDependency) { + AU.addRequired<MemorySSAWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); + } getLoopAnalysisUsage(AU); } }; @@ -2508,6 +2897,12 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + MemorySSA *MSSA = nullptr; + Optional<MemorySSAUpdater> MSSAU; + if (EnableMSSALoopDependency) { + MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); + MSSAU = MemorySSAUpdater(MSSA); + } auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; @@ -2527,7 +2922,14 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { LPM.markLoopAsDeleted(*L); }; - bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE); + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + + bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE, + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr); + + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); // If anything was unswitched, also clear any cached information about this // loop. @@ -2547,6 +2949,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) |