diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopUnswitch.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnswitch.cpp | 321 |
1 files changed, 277 insertions, 44 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index 645a89bbd0ff..18717394d384 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -98,6 +99,12 @@ static cl::opt<unsigned> Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden); +static cl::opt<unsigned> + MSSAThreshold("loop-unswitch-memoryssa-threshold", + cl::desc("Max number of memory uses to explore during " + "partial unswitching analysis"), + cl::init(100), cl::Hidden); + namespace { class LUAnalysisCache { @@ -184,6 +191,7 @@ namespace { Loop *CurrentLoop = nullptr; DominatorTree *DT = nullptr; MemorySSA *MSSA = nullptr; + AAResults *AA = nullptr; std::unique_ptr<MemorySSAUpdater> MSSAU; BasicBlock *LoopHeader = nullptr; BasicBlock *LoopPreheader = nullptr; @@ -217,6 +225,10 @@ namespace { /// loop preheaders be inserted into the CFG. /// void getAnalysisUsage(AnalysisUsage &AU) const override { + // Lazy BFI and BPI are marked as preserved here so Loop Unswitching + // can remain part of the same loop pass as LICM + AU.addPreserved<LazyBlockFrequencyInfoPass>(); + AU.addPreserved<LazyBranchProbabilityInfoPass>(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetTransformInfoWrapperPass>(); if (EnableMSSALoopDependency) { @@ -244,19 +256,22 @@ namespace { bool tryTrivialLoopUnswitch(bool &Changed); bool unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI = nullptr); + Instruction *TI = nullptr, + ArrayRef<Instruction *> ToDuplicate = {}); void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock, Instruction *TI); void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, - Instruction *TI); + Instruction *TI, + ArrayRef<Instruction *> ToDuplicate = {}); void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Constant *Val, bool IsEqual); - void emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, - BasicBlock *FalseDest, - BranchInst *OldBranch, Instruction *TI); + void + emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, + BasicBlock *TrueDest, BasicBlock *FalseDest, + BranchInst *OldBranch, Instruction *TI, + ArrayRef<Instruction *> ToDuplicate = {}); void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L); @@ -523,6 +538,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); LPM = &LPMRef; DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); if (EnableMSSALoopDependency) { MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); @@ -624,6 +640,145 @@ static bool equalityPropUnSafe(Value &LoopCond) { return false; } +/// Check if the loop header has a conditional branch that is not +/// loop-invariant, because it involves load instructions. If all paths from +/// either the true or false successor to the header or loop exists do not +/// modify the memory feeding the condition, perform 'partial unswitching'. That +/// is, duplicate the instructions feeding the condition in the pre-header. Then +/// unswitch on the duplicated condition. The condition is now known in the +/// unswitched version for the 'invariant' path through the original loop. +/// +/// If the branch condition of the header is partially invariant, return a pair +/// containing the instructions to duplicate and a boolean Constant to update +/// the condition in the loops created for the true or false successors. +static std::pair<SmallVector<Instruction *, 4>, Constant *> +hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) { + SmallVector<Instruction *, 4> ToDuplicate; + + auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator()); + if (!TI || !TI->isConditional()) + return {}; + + auto *CondI = dyn_cast<CmpInst>(TI->getCondition()); + // The case with the condition outside the loop should already be handled + // earlier. + if (!CondI || !L->contains(CondI)) + return {}; + + ToDuplicate.push_back(CondI); + + SmallVector<Value *, 4> WorkList; + WorkList.append(CondI->op_begin(), CondI->op_end()); + + SmallVector<MemoryAccess *, 4> AccessesToCheck; + SmallVector<MemoryLocation, 4> AccessedLocs; + while (!WorkList.empty()) { + Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val()); + if (!I || !L->contains(I)) + continue; + + // TODO: support additional instructions. + if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I)) + return {}; + + // Do not duplicate volatile and atomic loads. + if (auto *LI = dyn_cast<LoadInst>(I)) + if (LI->isVolatile() || LI->isAtomic()) + return {}; + + ToDuplicate.push_back(I); + if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { + if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) { + // Queue the defining access to check for alias checks. + AccessesToCheck.push_back(MemUse->getDefiningAccess()); + AccessedLocs.push_back(MemoryLocation::get(I)); + } else { + // MemoryDefs may clobber the location or may be atomic memory + // operations. Bail out. + return {}; + } + } + WorkList.append(I->op_begin(), I->op_end()); + } + + if (ToDuplicate.size() <= 1) + return {}; + + auto HasNoClobbersOnPath = + [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header, + SmallVector<MemoryAccess *, 4> AccessesToCheck) { + // First, collect all blocks in the loop that are on a patch from Succ + // to the header. + SmallVector<BasicBlock *, 4> WorkList; + WorkList.push_back(Succ); + WorkList.push_back(Header); + SmallPtrSet<BasicBlock *, 4> Seen; + Seen.insert(Header); + while (!WorkList.empty()) { + BasicBlock *Current = WorkList.pop_back_val(); + if (!L->contains(Current)) + continue; + const auto &SeenIns = Seen.insert(Current); + if (!SeenIns.second) + continue; + + WorkList.append(succ_begin(Current), succ_end(Current)); + } + + // Require at least 2 blocks on a path through the loop. This skips + // paths that directly exit the loop. + if (Seen.size() < 2) + return false; + + // Next, check if there are any MemoryDefs that are on the path through + // the loop (in the Seen set) and they may-alias any of the locations in + // AccessedLocs. If that is the case, they may modify the condition and + // partial unswitching is not possible. + SmallPtrSet<MemoryAccess *, 4> SeenAccesses; + while (!AccessesToCheck.empty()) { + MemoryAccess *Current = AccessesToCheck.pop_back_val(); + auto SeenI = SeenAccesses.insert(Current); + if (!SeenI.second || !Seen.contains(Current->getBlock())) + continue; + + // Bail out if exceeded the threshold. + if (SeenAccesses.size() >= MSSAThreshold) + return false; + + // MemoryUse are read-only accesses. + if (isa<MemoryUse>(Current)) + continue; + + // For a MemoryDef, check if is aliases any of the location feeding + // the original condition. + if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) { + if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) { + return isModSet( + AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc)); + })) + return false; + } + + for (Use &U : Current->uses()) + AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser())); + } + + return true; + }; + + // If we branch to the same successor, partial unswitching will not be + // beneficial. + if (TI->getSuccessor(0) == TI->getSuccessor(1)) + return {}; + + if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck)) + return {ToDuplicate, ConstantInt::getTrue(TI->getContext())}; + if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck)) + return {ToDuplicate, ConstantInt::getFalse(TI->getContext())}; + + return {}; +} + /// Do actual work and unswitch loop if possible and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; @@ -661,7 +816,7 @@ bool LoopUnswitch::processCurrentLoop() { // FIXME: Use Function::hasOptSize(). if (OptimizeForSize || LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) - return false; + return Changed; // Run through the instructions in the loop, keeping track of three things: // @@ -685,10 +840,10 @@ bool LoopUnswitch::processCurrentLoop() { if (!CB) continue; if (CB->isConvergent()) - return false; + return Changed; if (auto *II = dyn_cast<InvokeInst>(&I)) if (!II->getUnwindDest()->canSplitPredecessors()) - return false; + return Changed; if (auto *II = dyn_cast<IntrinsicInst>(&I)) if (II->getIntrinsicID() == Intrinsic::experimental_guard) Guards.push_back(II); @@ -823,6 +978,28 @@ bool LoopUnswitch::processCurrentLoop() { } } } + + // Check if there is a header condition that is invariant along the patch from + // either the true or false successors to the header. This allows unswitching + // conditions depending on memory accesses, if there's a path not clobbering + // the memory locations. Check if this transform has been disabled using + // metadata, to avoid unswitching the same loop multiple times. + if (MSSA && + !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { + auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA); + if (!ToDuplicate.first.empty()) { + LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition " + << *ToDuplicate.first[0] << "\n"); + ++NumBranches; + unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second, + CurrentLoop->getHeader()->getTerminator(), + ToDuplicate.first); + + RedoLoop = false; + return true; + } + } + return Changed; } @@ -880,7 +1057,8 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { /// simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI) { + Instruction *TI, + ArrayRef<Instruction *> ToDuplicate) { // Check to see if it would be profitable to unswitch current loop. if (!BranchesInfo.costAllowsUnswitching()) { LLVM_DEBUG(dbgs() << "NOT unswitching loop %" @@ -900,31 +1078,65 @@ bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, return false; } - unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI); + unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate); return true; } /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, /// otherwise branch to FalseDest. Insert the code immediately before OldBranch /// and remove (but not erase!) it from the function. -void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, - BasicBlock *FalseDest, - BranchInst *OldBranch, - Instruction *TI) { +void LoopUnswitch::emitPreheaderBranchOnCondition( + Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, + BranchInst *OldBranch, Instruction *TI, + ArrayRef<Instruction *> ToDuplicate) { assert(OldBranch->isUnconditional() && "Preheader is not split correctly"); assert(TrueDest != FalseDest && "Branch targets should be different"); + // Insert a conditional branch on LIC to the two preheaders. The original // code is the true version and the new code is the false version. Value *BranchVal = LIC; bool Swapped = false; - if (!isa<ConstantInt>(Val) || - Val->getType() != Type::getInt1Ty(LIC->getContext())) - BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); - else if (Val != ConstantInt::getTrue(Val->getContext())) { - // We want to enter the new loop when the condition is true. - std::swap(TrueDest, FalseDest); - Swapped = true; + + if (!ToDuplicate.empty()) { + ValueToValueMapTy Old2New; + for (Instruction *I : reverse(ToDuplicate)) { + auto *New = I->clone(); + New->insertBefore(OldBranch); + RemapInstruction(New, Old2New, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + Old2New[I] = New; + + if (MSSAU) { + MemorySSA *MSSA = MSSAU->getMemorySSA(); + auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I)); + if (!MemA) + continue; + + Loop *L = LI->getLoopFor(I->getParent()); + auto *DefiningAccess = MemA->getDefiningAccess(); + // If the defining access is a MemoryPhi in the header, get the incoming + // value for the pre-header as defining access. + if (DefiningAccess->getBlock() == I->getParent()) { + if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) { + DefiningAccess = + MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); + } + } + MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), + MemorySSA::BeforeTerminator); + } + } + BranchVal = Old2New[ToDuplicate[0]]; + } else { + + if (!isa<ConstantInt>(Val) || + Val->getType() != Type::getInt1Ty(LIC->getContext())) + BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); + else if (Val != ConstantInt::getTrue(Val->getContext())) { + // We want to enter the new loop when the condition is true. + std::swap(TrueDest, FalseDest); + Swapped = true; + } } // Old branch will be removed, so save its parent and successor to update the @@ -955,10 +1167,11 @@ void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) { Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc}); } - DT->applyUpdates(Updates); if (MSSAU) - MSSAU->applyUpdates(Updates, *DT); + MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); + else + DT->applyUpdates(Updates); } // If either edge is critical, split it. This helps preserve LoopSimplify @@ -1207,8 +1420,9 @@ void LoopUnswitch::splitExitEdges( /// We determined that the loop is profitable to unswitch when LIC equal Val. /// Split it into loop versions and test the condition outside of either loop. /// Return the loops created as Out1/Out2. -void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val, - Loop *L, Instruction *TI) { +void LoopUnswitch::unswitchNontrivialCondition( + Value *LIC, Constant *Val, Loop *L, Instruction *TI, + ArrayRef<Instruction *> ToDuplicate) { Function *F = LoopHeader->getParent(); LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << LoopHeader->getName() << " [" << L->getBlocks().size() @@ -1233,7 +1447,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val, LoopBlocks.push_back(NewPreheader); // We want the loop to come after the preheader, but before the exit blocks. - LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); + llvm::append_range(LoopBlocks, L->blocks()); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -1247,7 +1461,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val, L->getUniqueExitBlocks(ExitBlocks); // Add exit blocks to the loop blocks. - LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); + llvm::append_range(LoopBlocks, ExitBlocks); // Next step, clone all of the basic blocks that make up the loop (including // the loop preheader and exit blocks), keeping track of the mapping between @@ -1340,7 +1554,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val, // Emit the new branch that selects between the two versions of this loop. emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, - TI); + TI, ToDuplicate); if (MSSAU) { // Update MemoryPhis in Exit blocks. MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT); @@ -1362,17 +1576,38 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val, // iteration. WeakTrackingVH LICHandle(LIC); - // Now we rewrite the original code to know that the condition is true and the - // new code to know that the condition is false. - rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); - - // It's possible that simplifying one loop could cause the other to be - // changed to another value or a constant. If its a constant, don't simplify - // it. - if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && - LICHandle && !isa<Constant>(LICHandle)) - rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, - /*IsEqual=*/true); + if (ToDuplicate.empty()) { + // Now we rewrite the original code to know that the condition is true and + // the new code to know that the condition is false. + rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); + + // It's possible that simplifying one loop could cause the other to be + // changed to another value or a constant. If its a constant, don't + // simplify it. + if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && + LICHandle && !isa<Constant>(LICHandle)) + rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, + /*IsEqual=*/true); + } else { + // Partial unswitching. Update the condition in the right loop with the + // constant. + auto *CC = cast<ConstantInt>(Val); + if (CC->isOneValue()) { + rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, + /*IsEqual=*/true); + } else + rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); + + // Mark the new loop as partially unswitched, to avoid unswitching on the + // same condition again. + auto &Context = NewLoop->getHeader()->getContext(); + MDNode *DisableUnswitchMD = MDNode::get( + Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable")); + MDNode *NewLoopID = makePostTransformationMetadata( + Context, L->getLoopID(), {"llvm.loop.unswitch.partial"}, + {DisableUnswitchMD}); + NewLoop->setLoopID(NewLoopID); + } if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); @@ -1381,9 +1616,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val, /// Remove all instances of I from the worklist vector specified. static void removeFromWorklist(Instruction *I, std::vector<Instruction *> &Worklist) { - - Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I), - Worklist.end()); + llvm::erase_value(Worklist, I); } /// When we find that I really equals V, remove I from the |