diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp | 577 |
1 files changed, 222 insertions, 355 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp index d4cd57405239..a91bf7b7af13 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -59,7 +59,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -220,26 +219,24 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, } } - // At this point, the code is well formed. We now do a quick sweep over the - // inserted code, doing constant propagation and dead code elimination as we - // go. + // At this point, the code is well formed. Perform constprop, instsimplify, + // and dce. const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + SmallVector<WeakTrackingVH, 16> DeadInsts; for (BasicBlock *BB : L->getBlocks()) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { Instruction *Inst = &*I++; - if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) if (LI->replacementPreservesLCSSAForm(Inst, V)) Inst->replaceAllUsesWith(V); if (isInstructionTriviallyDead(Inst)) - BB->getInstList().erase(Inst); + DeadInsts.emplace_back(Inst); } + // We can't do recursive deletion until we're done iterating, as we might + // have a phi which (potentially indirectly) uses instructions later in + // the block we're iterating through. + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); } - - // TODO: after peeling or unrolling, previously loop variant conditions are - // likely to fold to constants, eagerly propagating those here will require - // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be - // appropriate. } /// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling @@ -247,32 +244,10 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// -/// TripCount is the upper bound of the iteration on which control exits -/// LatchBlock. Control may exit the loop prior to TripCount iterations either -/// via an early branch in other loop block or via LatchBlock terminator. This -/// is relaxed from the general definition of trip count which is the number of -/// times the loop header executes. Note that UnrollLoop assumes that the loop -/// counter test is in LatchBlock in order to remove unnecesssary instances of -/// the test. If control can exit the loop from the LatchBlock's terminator -/// prior to TripCount iterations, flag PreserveCondBr needs to be set. -/// -/// PreserveCondBr indicates whether the conditional branch of the LatchBlock -/// needs to be preserved. It is needed when we use trip count upper bound to -/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first -/// conditional branch needs to be preserved. -/// -/// Similarly, TripMultiple divides the number of times that the LatchBlock may -/// execute without exiting the loop. -/// -/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that -/// have a runtime (i.e. not compile time constant) trip count. Unrolling these -/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" -/// iterations before branching into the unrolled loop. UnrollLoop will not -/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and -/// AllowExpensiveTripCount is false. -/// -/// If we want to perform PGO-based loop peeling, PeelCount is set to the -/// number of iterations we want to peel off. +/// If Runtime is true then UnrollLoop will try to insert a prologue or +/// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop +/// will not runtime-unroll the loop if computing the run-time trip count will +/// be expensive and AllowExpensiveTripCount is false. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// @@ -287,6 +262,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) { + assert(DT && "DomTree is required"); if (!L->getLoopPreheader()) { LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -311,56 +287,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, return LoopUnrollResult::Unmodified; } - if (ULO.TripCount != 0) - LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n"); - if (ULO.TripMultiple != 1) - LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n"); - - // Effectively "DCE" unrolled iterations that are beyond the tripcount - // and will never be executed. - if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount) - ULO.Count = ULO.TripCount; - - // Don't enter the unroll code if there is nothing to do. - if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) { - LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); - return LoopUnrollResult::Unmodified; - } - assert(ULO.Count > 0); - assert(ULO.TripMultiple > 0); - assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0); - - // Are we eliminating the loop control altogether? - bool CompletelyUnroll = ULO.Count == ULO.TripCount; - - // We assume a run-time trip count if the compiler cannot - // figure out the loop trip count and the unroll-runtime - // flag is specified. - bool RuntimeTripCount = - (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime); - - assert((!RuntimeTripCount || !ULO.PeelCount) && - "Did not expect runtime trip-count unrolling " - "and peeling for the same loop"); - - bool Peeled = false; - if (ULO.PeelCount) { - Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA); - - // Successful peeling may result in a change in the loop preheader/trip - // counts. If we later unroll the loop, we want these to be updated. - if (Peeled) { - // According to our guards and profitability checks the only - // meaningful exit should be latch block. Other exits go to deopt, - // so we do not worry about them. - BasicBlock *ExitingBlock = L->getLoopLatch(); - assert(ExitingBlock && "Loop without exiting block?"); - assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?"); - ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); - ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); - } - } // All these values should be taken only after peeling because they might have // changed. @@ -371,6 +298,61 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, L->getExitBlocks(ExitBlocks); std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks(); + const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L); + const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); + + // Effectively "DCE" unrolled iterations that are beyond the max tripcount + // and will never be executed. + if (MaxTripCount && ULO.Count > MaxTripCount) + ULO.Count = MaxTripCount; + + struct ExitInfo { + unsigned TripCount; + unsigned TripMultiple; + unsigned BreakoutTrip; + bool ExitOnTrue; + SmallVector<BasicBlock *> ExitingBlocks; + }; + DenseMap<BasicBlock *, ExitInfo> ExitInfos; + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (auto *ExitingBlock : ExitingBlocks) { + // The folding code is not prepared to deal with non-branch instructions + // right now. + auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!BI) + continue; + + ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second; + Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); + Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); + if (Info.TripCount != 0) { + Info.BreakoutTrip = Info.TripCount % ULO.Count; + Info.TripMultiple = 0; + } else { + Info.BreakoutTrip = Info.TripMultiple = + (unsigned)GreatestCommonDivisor64(ULO.Count, Info.TripMultiple); + } + Info.ExitOnTrue = !L->contains(BI->getSuccessor(0)); + Info.ExitingBlocks.push_back(ExitingBlock); + LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName() + << ": TripCount=" << Info.TripCount + << ", TripMultiple=" << Info.TripMultiple + << ", BreakoutTrip=" << Info.BreakoutTrip << "\n"); + } + + // Are we eliminating the loop control altogether? Note that we can know + // we're eliminating the backedge without knowing exactly which iteration + // of the unrolled body exits. + const bool CompletelyUnroll = ULO.Count == MaxTripCount; + + const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero; + + // There's no point in performing runtime unrolling if this unroll count + // results in a full unroll. + if (CompletelyUnroll) + ULO.Runtime = false; + // Go through all exits of L and see if there are any phi-nodes there. We just // conservatively assume that they're inserted to preserve LCSSA form, which // means that complete unrolling might break this form. We need to either fix @@ -392,30 +374,16 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // A conditional branch which exits the loop, which can be optimized to an // unconditional branch in the unrolled loop in some cases. - BranchInst *ExitingBI = nullptr; bool LatchIsExiting = L->isLoopExiting(LatchBlock); - if (LatchIsExiting) - ExitingBI = LatchBI; - else if (BasicBlock *ExitingBlock = L->getExitingBlock()) - ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { - // If the peeling guard is changed this assert may be relaxed or even - // deleted. - assert(!Peeled && "Peeling guard changed!"); LLVM_DEBUG( dbgs() << "Can't unroll; a conditional latch must exit the loop"); return LoopUnrollResult::Unmodified; } - LLVM_DEBUG({ - if (ExitingBI) - dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName() - << "\n"; - else - dbgs() << " No single exiting block\n"; - }); - // Loops containing convergent instructions must have a count that divides - // their TripMultiple. + // Loops containing convergent instructions cannot use runtime unrolling, + // as the prologue/epilogue may add additional control-dependencies to + // convergent operations. LLVM_DEBUG( { bool HasConvergent = false; @@ -423,22 +391,21 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, for (auto &I : *BB) if (auto *CB = dyn_cast<CallBase>(&I)) HasConvergent |= CB->isConvergent(); - assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) && - "Unroll count must divide trip multiple if loop contains a " - "convergent operation."); + assert((!HasConvergent || !ULO.Runtime) && + "Can't runtime unroll if loop contains a convergent operation."); }); bool EpilogProfitability = UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog : isEpilogProfitable(L); - if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 && + if (ULO.Runtime && !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability, ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, PreserveLCSSA, RemainderLoop)) { if (ULO.Force) - RuntimeTripCount = false; + ULO.Runtime = false; else { LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " "generated when assuming runtime trip count\n"); @@ -446,71 +413,34 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } - // If we know the trip count, we know the multiple... - unsigned BreakoutTrip = 0; - if (ULO.TripCount != 0) { - BreakoutTrip = ULO.TripCount % ULO.Count; - ULO.TripMultiple = 0; - } else { - // Figure out what multiple to use. - BreakoutTrip = ULO.TripMultiple = - (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple); - } - using namespace ore; // Report the unrolling decision. if (CompletelyUnroll) { LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() - << " with trip count " << ULO.TripCount << "!\n"); + << " with trip count " << ULO.Count << "!\n"); if (ORE) ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), L->getHeader()) << "completely unrolled loop with " - << NV("UnrollCount", ULO.TripCount) << " iterations"; - }); - } else if (ULO.PeelCount) { - LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName() - << " with iteration count " << ULO.PeelCount << "!\n"); - if (ORE) - ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), - L->getHeader()) - << " peeled loop by " << NV("PeelCount", ULO.PeelCount) - << " iterations"; + << NV("UnrollCount", ULO.Count) << " iterations"; }); } else { - auto DiagBuilder = [&]() { - OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), - L->getHeader()); - return Diag << "unrolled loop by a factor of " - << NV("UnrollCount", ULO.Count); - }; - LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << ULO.Count); - if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) { - LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); - if (ORE) - ORE->emit([&]() { - return DiagBuilder() << " with a breakout at trip " - << NV("BreakoutTrip", BreakoutTrip); - }); - } else if (ULO.TripMultiple != 1) { - LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch"); - if (ORE) - ORE->emit([&]() { - return DiagBuilder() - << " with " << NV("TripMultiple", ULO.TripMultiple) - << " trips per branch"; - }); - } else if (RuntimeTripCount) { + if (ULO.Runtime) LLVM_DEBUG(dbgs() << " with run-time trip count"); - if (ORE) - ORE->emit( - [&]() { return DiagBuilder() << " with run-time trip count"; }); - } LLVM_DEBUG(dbgs() << "!\n"); + + if (ORE) + ORE->emit([&]() { + OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), + L->getHeader()); + Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count); + if (ULO.Runtime) + Diag << " with run-time trip count"; + return Diag; + }); } // We are going to make changes to this loop. SCEV may be keeping cached info @@ -530,12 +460,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (!LatchIsExiting) ++NumUnrolledNotLatch; - Optional<bool> ContinueOnTrue = None; - BasicBlock *LoopExit = nullptr; - if (ExitingBI) { - ContinueOnTrue = L->contains(ExitingBI->getSuccessor(0)); - LoopExit = ExitingBI->getSuccessor(*ContinueOnTrue); - } // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. @@ -546,15 +470,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } std::vector<BasicBlock *> Headers; - std::vector<BasicBlock *> ExitingBlocks; - std::vector<BasicBlock *> ExitingSucc; std::vector<BasicBlock *> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); - if (ExitingBI) { - ExitingBlocks.push_back(ExitingBI->getParent()); - ExitingSucc.push_back(ExitingBI->getSuccessor(!(*ContinueOnTrue))); - } // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each @@ -576,7 +494,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, for (Loop *SubLoop : *L) LoopsToSimplify.insert(SubLoop); - if (Header->getParent()->isDebugInfoForProfiling()) + // When a FSDiscriminator is enabled, we don't need to add the multiply + // factors to the discriminators. + if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator) for (BasicBlock *BB : L->getBlocks()) for (Instruction &I : *BB) if (!isa<DbgInfoIntrinsic>(&I)) @@ -652,12 +572,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // Keep track of the exiting block and its successor block contained in // the loop for the current iteration. - if (ExitingBI) { - if (*BB == ExitingBlocks[0]) - ExitingBlocks.push_back(New); - if (*BB == ExitingSucc[0]) - ExitingSucc.push_back(New); - } + auto ExitInfoIt = ExitInfos.find(*BB); + if (ExitInfoIt != ExitInfos.end()) + ExitInfoIt->second.ExitingBlocks.push_back(New); NewBlocks.push_back(New); UnrolledLoopBlocks.push_back(New); @@ -666,28 +583,23 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // dedicated entry block (copy of the header block), this header's copy // dominates all copied blocks. That means, dominance relations in the // copied body are the same as in the original body. - if (DT) { - if (*BB == Header) - DT->addNewBlock(New, Latches[It - 1]); - else { - auto BBDomNode = DT->getNode(*BB); - auto BBIDom = BBDomNode->getIDom(); - BasicBlock *OriginalBBIDom = BBIDom->getBlock(); - DT->addNewBlock( - New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); - } + if (*BB == Header) + DT->addNewBlock(New, Latches[It - 1]); + else { + auto BBDomNode = DT->getNode(*BB); + auto BBIDom = BBDomNode->getIDom(); + BasicBlock *OriginalBBIDom = BBIDom->getBlock(); + DT->addNewBlock( + New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); } } // Remap all instructions in the most recent iteration remapInstructionsInBlocks(NewBlocks, LastValueMap); - for (BasicBlock *NewBlock : NewBlocks) { - for (Instruction &I : *NewBlock) { - if (auto *II = dyn_cast<IntrinsicInst>(&I)) - if (II->getIntrinsicID() == Intrinsic::assume) - AC->registerAssumption(II); - } - } + for (BasicBlock *NewBlock : NewBlocks) + for (Instruction &I : *NewBlock) + if (auto *II = dyn_cast<AssumeInst>(&I)) + AC->registerAssumption(II); { // Identify what other metadata depends on the cloned version. After @@ -717,116 +629,18 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } - auto setDest = [](BasicBlock *Src, BasicBlock *Dest, BasicBlock *BlockInLoop, - bool NeedConditional, Optional<bool> ContinueOnTrue, - bool IsDestLoopExit) { - auto *Term = cast<BranchInst>(Src->getTerminator()); - if (NeedConditional) { - // Update the conditional branch's successor for the following - // iteration. - assert(ContinueOnTrue.hasValue() && - "Expecting valid ContinueOnTrue when NeedConditional is true"); - Term->setSuccessor(!(*ContinueOnTrue), Dest); - } else { - // Remove phi operands at this loop exit - if (!IsDestLoopExit) { - BasicBlock *BB = Src; - for (BasicBlock *Succ : successors(BB)) { - // Preserve the incoming value from BB if we are jumping to the block - // in the current loop. - if (Succ == BlockInLoop) - continue; - for (PHINode &Phi : Succ->phis()) - Phi.removeIncomingValue(BB, false); - } - } - // Replace the conditional branch with an unconditional one. - BranchInst::Create(Dest, Term); - Term->eraseFromParent(); - } - }; - // Connect latches of the unrolled iterations to the headers of the next - // iteration. If the latch is also the exiting block, the conditional branch - // may have to be preserved. + // iteration. Currently they point to the header of the same iteration. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { - // The branch destination. unsigned j = (i + 1) % e; - BasicBlock *Dest = Headers[j]; - bool NeedConditional = LatchIsExiting; - - if (LatchIsExiting) { - if (RuntimeTripCount && j != 0) - NeedConditional = false; - - // For a complete unroll, make the last iteration end with a branch - // to the exit block. - if (CompletelyUnroll) { - if (j == 0) - Dest = LoopExit; - // If using trip count upper bound to completely unroll, we need to - // keep the conditional branch except the last one because the loop - // may exit after any iteration. - assert(NeedConditional && - "NeedCondition cannot be modified by both complete " - "unrolling and runtime unrolling"); - NeedConditional = - (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0)); - } else if (j != BreakoutTrip && - (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) { - // If we know the trip count or a multiple of it, we can safely use an - // unconditional branch for some iterations. - NeedConditional = false; - } - } - - setDest(Latches[i], Dest, Headers[i], NeedConditional, ContinueOnTrue, - Dest == LoopExit); - } - - if (!LatchIsExiting) { - // If the latch is not exiting, we may be able to simplify the conditional - // branches in the unrolled exiting blocks. - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - // The branch destination. - unsigned j = (i + 1) % e; - bool NeedConditional = true; - - if (RuntimeTripCount && j != 0) - NeedConditional = false; - - if (CompletelyUnroll) - // We cannot drop the conditional branch for the last condition, as we - // may have to execute the loop body depending on the condition. - NeedConditional = j == 0 || ULO.PreserveCondBr; - else if (j != BreakoutTrip && - (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) - // If we know the trip count or a multiple of it, we can safely use an - // unconditional branch for some iterations. - NeedConditional = false; - - // Conditional branches from non-latch exiting block have successors - // either in the same loop iteration or outside the loop. The branches are - // already correct. - if (NeedConditional) - continue; - setDest(ExitingBlocks[i], ExitingSucc[i], ExitingSucc[i], NeedConditional, - None, false); - } - - // When completely unrolling, the last latch becomes unreachable. - if (CompletelyUnroll) { - BranchInst *Term = cast<BranchInst>(Latches.back()->getTerminator()); - new UnreachableInst(Term->getContext(), Term); - Term->eraseFromParent(); - } + Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]); } // Update dominators of blocks we might reach through exits. // Immediate dominator of such block might change, because we add more // routes which can lead to the exit: we can now reach it from the copied // iterations too. - if (DT && ULO.Count > 1) { + if (ULO.Count > 1) { for (auto *BB : OriginalLoopBlocks) { auto *BBDomNode = DT->getNode(BB); SmallVector<BasicBlock *, 16> ChildrenToUpdate; @@ -835,42 +649,98 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (!L->contains(ChildBB)) ChildrenToUpdate.push_back(ChildBB); } - BasicBlock *NewIDom; - if (ExitingBI && BB == ExitingBlocks[0]) { - // The latch is special because we emit unconditional branches in - // some cases where the original loop contained a conditional branch. - // Since the latch is always at the bottom of the loop, if the latch - // dominated an exit before unrolling, the new dominator of that exit - // must also be a latch. Specifically, the dominator is the first - // latch which ends in a conditional branch, or the last latch if - // there is no such latch. - // For loops exiting from non latch exiting block, we limit the - // branch simplification to single exiting block loops. - NewIDom = ExitingBlocks.back(); - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - Instruction *Term = ExitingBlocks[i]->getTerminator(); - if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) { - NewIDom = - DT->findNearestCommonDominator(ExitingBlocks[i], Latches[i]); - break; - } - } - } else { - // The new idom of the block will be the nearest common dominator - // of all copies of the previous idom. This is equivalent to the - // nearest common dominator of the previous idom and the first latch, - // which dominates all copies of the previous idom. - NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); - } + // The new idom of the block will be the nearest common dominator + // of all copies of the previous idom. This is equivalent to the + // nearest common dominator of the previous idom and the first latch, + // which dominates all copies of the previous idom. + BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); for (auto *ChildBB : ChildrenToUpdate) DT->changeImmediateDominator(ChildBB, NewIDom); } } - assert(!DT || !UnrollVerifyDomtree || + assert(!UnrollVerifyDomtree || DT->verify(DominatorTree::VerificationLevel::Fast)); DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + + auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) { + auto *Term = cast<BranchInst>(Src->getTerminator()); + const unsigned Idx = ExitOnTrue ^ WillExit; + BasicBlock *Dest = Term->getSuccessor(Idx); + BasicBlock *DeadSucc = Term->getSuccessor(1-Idx); + + // Remove predecessors from all non-Dest successors. + DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true); + + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Dest, Term); + Term->eraseFromParent(); + + DTU.applyUpdates({{DominatorTree::Delete, Src, DeadSucc}}); + }; + + auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j, + bool IsLatch) -> Optional<bool> { + if (CompletelyUnroll) { + if (PreserveOnlyFirst) { + if (i == 0) + return None; + return j == 0; + } + // Complete (but possibly inexact) unrolling + if (j == 0) + return true; + if (Info.TripCount && j != Info.TripCount) + return false; + return None; + } + + if (ULO.Runtime) { + // If runtime unrolling inserts a prologue, information about non-latch + // exits may be stale. + if (IsLatch && j != 0) + return false; + return None; + } + + if (j != Info.BreakoutTrip && + (Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) { + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + return false; + } + return None; + }; + + // Fold branches for iterations where we know that they will exit or not + // exit. + for (const auto &Pair : ExitInfos) { + const ExitInfo &Info = Pair.second; + for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) { + // The branch destination. + unsigned j = (i + 1) % e; + bool IsLatch = Pair.first == LatchBlock; + Optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch); + if (!KnownWillExit) + continue; + + // We don't fold known-exiting branches for non-latch exits here, + // because this ensures that both all loop blocks and all exit blocks + // remain reachable in the CFG. + // TODO: We could fold these branches, but it would require much more + // sophisticated updates to LoopInfo. + if (*KnownWillExit && !IsLatch) + continue; + + SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue); + } + } + + // When completely unrolling, the last latch becomes unreachable. + if (!LatchIsExiting && CompletelyUnroll) + changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA, &DTU); + // Merge adjacent basic blocks, if possible. for (BasicBlock *Latch : Latches) { BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator()); @@ -893,8 +763,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // At this point, the code is well formed. We now simplify the unrolled loop, // doing constant propagation and dead code elimination as we go. - simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI, - SE, DT, AC, TTI); + simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC, + TTI); NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; @@ -915,39 +785,36 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA) NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI); - // If we have a pass and a DominatorTree we should re-simplify impacted loops - // to ensure subsequent analyses can rely on this form. We want to simplify + // Make sure that loop-simplify form is preserved. We want to simplify // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. - if (DT) { - if (OuterL) { - // OuterL includes all loops for which we can break loop-simplify, so - // it's sufficient to simplify only it (it'll recursively simplify inner - // loops too). - if (NeedToFixLCSSA) { - // LCSSA must be performed on the outermost affected loop. The unrolled - // loop's last loop latch is guaranteed to be in the outermost loop - // after LoopInfo's been updated by LoopInfo::erase. - Loop *LatchLoop = LI->getLoopFor(Latches.back()); - Loop *FixLCSSALoop = OuterL; - if (!FixLCSSALoop->contains(LatchLoop)) - while (FixLCSSALoop->getParentLoop() != LatchLoop) - FixLCSSALoop = FixLCSSALoop->getParentLoop(); - - formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE); - } else if (PreserveLCSSA) { - assert(OuterL->isLCSSAForm(*DT) && - "Loops should be in LCSSA form after loop-unroll."); - } - - // TODO: That potentially might be compile-time expensive. We should try - // to fix the loop-simplified form incrementally. - simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA); - } else { - // Simplify loops for which we might've broken loop-simplify form. - for (Loop *SubLoop : LoopsToSimplify) - simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA); + if (OuterL) { + // OuterL includes all loops for which we can break loop-simplify, so + // it's sufficient to simplify only it (it'll recursively simplify inner + // loops too). + if (NeedToFixLCSSA) { + // LCSSA must be performed on the outermost affected loop. The unrolled + // loop's last loop latch is guaranteed to be in the outermost loop + // after LoopInfo's been updated by LoopInfo::erase. + Loop *LatchLoop = LI->getLoopFor(Latches.back()); + Loop *FixLCSSALoop = OuterL; + if (!FixLCSSALoop->contains(LatchLoop)) + while (FixLCSSALoop->getParentLoop() != LatchLoop) + FixLCSSALoop = FixLCSSALoop->getParentLoop(); + + formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE); + } else if (PreserveLCSSA) { + assert(OuterL->isLCSSAForm(*DT) && + "Loops should be in LCSSA form after loop-unroll."); } + + // TODO: That potentially might be compile-time expensive. We should try + // to fix the loop-simplified form incrementally. + simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA); + } else { + // Simplify loops for which we might've broken loop-simplify form. + for (Loop *SubLoop : LoopsToSimplify) + simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA); } return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled |
