aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp577
1 files changed, 222 insertions, 355 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index d4cd57405239..a91bf7b7af13 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -59,7 +59,6 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -220,26 +219,24 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
}
}
- // At this point, the code is well formed. We now do a quick sweep over the
- // inserted code, doing constant propagation and dead code elimination as we
- // go.
+ // At this point, the code is well formed. Perform constprop, instsimplify,
+ // and dce.
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
for (BasicBlock *BB : L->getBlocks()) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *Inst = &*I++;
-
if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
if (LI->replacementPreservesLCSSAForm(Inst, V))
Inst->replaceAllUsesWith(V);
if (isInstructionTriviallyDead(Inst))
- BB->getInstList().erase(Inst);
+ DeadInsts.emplace_back(Inst);
}
+ // We can't do recursive deletion until we're done iterating, as we might
+ // have a phi which (potentially indirectly) uses instructions later in
+ // the block we're iterating through.
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
}
-
- // TODO: after peeling or unrolling, previously loop variant conditions are
- // likely to fold to constants, eagerly propagating those here will require
- // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be
- // appropriate.
}
/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
@@ -247,32 +244,10 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
-/// TripCount is the upper bound of the iteration on which control exits
-/// LatchBlock. Control may exit the loop prior to TripCount iterations either
-/// via an early branch in other loop block or via LatchBlock terminator. This
-/// is relaxed from the general definition of trip count which is the number of
-/// times the loop header executes. Note that UnrollLoop assumes that the loop
-/// counter test is in LatchBlock in order to remove unnecesssary instances of
-/// the test. If control can exit the loop from the LatchBlock's terminator
-/// prior to TripCount iterations, flag PreserveCondBr needs to be set.
-///
-/// PreserveCondBr indicates whether the conditional branch of the LatchBlock
-/// needs to be preserved. It is needed when we use trip count upper bound to
-/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
-/// conditional branch needs to be preserved.
-///
-/// Similarly, TripMultiple divides the number of times that the LatchBlock may
-/// execute without exiting the loop.
-///
-/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
-/// have a runtime (i.e. not compile time constant) trip count. Unrolling these
-/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
-/// iterations before branching into the unrolled loop. UnrollLoop will not
-/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
-/// AllowExpensiveTripCount is false.
-///
-/// If we want to perform PGO-based loop peeling, PeelCount is set to the
-/// number of iterations we want to peel off.
+/// If Runtime is true then UnrollLoop will try to insert a prologue or
+/// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop
+/// will not runtime-unroll the loop if computing the run-time trip count will
+/// be expensive and AllowExpensiveTripCount is false.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
@@ -287,6 +262,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
const TargetTransformInfo *TTI,
OptimizationRemarkEmitter *ORE,
bool PreserveLCSSA, Loop **RemainderLoop) {
+ assert(DT && "DomTree is required");
if (!L->getLoopPreheader()) {
LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -311,56 +287,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}
- if (ULO.TripCount != 0)
- LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n");
- if (ULO.TripMultiple != 1)
- LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n");
-
- // Effectively "DCE" unrolled iterations that are beyond the tripcount
- // and will never be executed.
- if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount)
- ULO.Count = ULO.TripCount;
-
- // Don't enter the unroll code if there is nothing to do.
- if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) {
- LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
- return LoopUnrollResult::Unmodified;
- }
-
assert(ULO.Count > 0);
- assert(ULO.TripMultiple > 0);
- assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
-
- // Are we eliminating the loop control altogether?
- bool CompletelyUnroll = ULO.Count == ULO.TripCount;
-
- // We assume a run-time trip count if the compiler cannot
- // figure out the loop trip count and the unroll-runtime
- // flag is specified.
- bool RuntimeTripCount =
- (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
-
- assert((!RuntimeTripCount || !ULO.PeelCount) &&
- "Did not expect runtime trip-count unrolling "
- "and peeling for the same loop");
-
- bool Peeled = false;
- if (ULO.PeelCount) {
- Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA);
-
- // Successful peeling may result in a change in the loop preheader/trip
- // counts. If we later unroll the loop, we want these to be updated.
- if (Peeled) {
- // According to our guards and profitability checks the only
- // meaningful exit should be latch block. Other exits go to deopt,
- // so we do not worry about them.
- BasicBlock *ExitingBlock = L->getLoopLatch();
- assert(ExitingBlock && "Loop without exiting block?");
- assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
- ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
- ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
- }
- }
// All these values should be taken only after peeling because they might have
// changed.
@@ -371,6 +298,61 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
L->getExitBlocks(ExitBlocks);
std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
+ const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
+ const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+
+ // Effectively "DCE" unrolled iterations that are beyond the max tripcount
+ // and will never be executed.
+ if (MaxTripCount && ULO.Count > MaxTripCount)
+ ULO.Count = MaxTripCount;
+
+ struct ExitInfo {
+ unsigned TripCount;
+ unsigned TripMultiple;
+ unsigned BreakoutTrip;
+ bool ExitOnTrue;
+ SmallVector<BasicBlock *> ExitingBlocks;
+ };
+ DenseMap<BasicBlock *, ExitInfo> ExitInfos;
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (auto *ExitingBlock : ExitingBlocks) {
+ // The folding code is not prepared to deal with non-branch instructions
+ // right now.
+ auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI)
+ continue;
+
+ ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second;
+ Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+ Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ if (Info.TripCount != 0) {
+ Info.BreakoutTrip = Info.TripCount % ULO.Count;
+ Info.TripMultiple = 0;
+ } else {
+ Info.BreakoutTrip = Info.TripMultiple =
+ (unsigned)GreatestCommonDivisor64(ULO.Count, Info.TripMultiple);
+ }
+ Info.ExitOnTrue = !L->contains(BI->getSuccessor(0));
+ Info.ExitingBlocks.push_back(ExitingBlock);
+ LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName()
+ << ": TripCount=" << Info.TripCount
+ << ", TripMultiple=" << Info.TripMultiple
+ << ", BreakoutTrip=" << Info.BreakoutTrip << "\n");
+ }
+
+ // Are we eliminating the loop control altogether? Note that we can know
+ // we're eliminating the backedge without knowing exactly which iteration
+ // of the unrolled body exits.
+ const bool CompletelyUnroll = ULO.Count == MaxTripCount;
+
+ const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero;
+
+ // There's no point in performing runtime unrolling if this unroll count
+ // results in a full unroll.
+ if (CompletelyUnroll)
+ ULO.Runtime = false;
+
// Go through all exits of L and see if there are any phi-nodes there. We just
// conservatively assume that they're inserted to preserve LCSSA form, which
// means that complete unrolling might break this form. We need to either fix
@@ -392,30 +374,16 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// A conditional branch which exits the loop, which can be optimized to an
// unconditional branch in the unrolled loop in some cases.
- BranchInst *ExitingBI = nullptr;
bool LatchIsExiting = L->isLoopExiting(LatchBlock);
- if (LatchIsExiting)
- ExitingBI = LatchBI;
- else if (BasicBlock *ExitingBlock = L->getExitingBlock())
- ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
- // If the peeling guard is changed this assert may be relaxed or even
- // deleted.
- assert(!Peeled && "Peeling guard changed!");
LLVM_DEBUG(
dbgs() << "Can't unroll; a conditional latch must exit the loop");
return LoopUnrollResult::Unmodified;
}
- LLVM_DEBUG({
- if (ExitingBI)
- dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName()
- << "\n";
- else
- dbgs() << " No single exiting block\n";
- });
- // Loops containing convergent instructions must have a count that divides
- // their TripMultiple.
+ // Loops containing convergent instructions cannot use runtime unrolling,
+ // as the prologue/epilogue may add additional control-dependencies to
+ // convergent operations.
LLVM_DEBUG(
{
bool HasConvergent = false;
@@ -423,22 +391,21 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
for (auto &I : *BB)
if (auto *CB = dyn_cast<CallBase>(&I))
HasConvergent |= CB->isConvergent();
- assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) &&
- "Unroll count must divide trip multiple if loop contains a "
- "convergent operation.");
+ assert((!HasConvergent || !ULO.Runtime) &&
+ "Can't runtime unroll if loop contains a convergent operation.");
});
bool EpilogProfitability =
UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
: isEpilogProfitable(L);
- if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 &&
+ if (ULO.Runtime &&
!UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
EpilogProfitability, ULO.UnrollRemainder,
ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
PreserveLCSSA, RemainderLoop)) {
if (ULO.Force)
- RuntimeTripCount = false;
+ ULO.Runtime = false;
else {
LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
"generated when assuming runtime trip count\n");
@@ -446,71 +413,34 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
- // If we know the trip count, we know the multiple...
- unsigned BreakoutTrip = 0;
- if (ULO.TripCount != 0) {
- BreakoutTrip = ULO.TripCount % ULO.Count;
- ULO.TripMultiple = 0;
- } else {
- // Figure out what multiple to use.
- BreakoutTrip = ULO.TripMultiple =
- (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple);
- }
-
using namespace ore;
// Report the unrolling decision.
if (CompletelyUnroll) {
LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
- << " with trip count " << ULO.TripCount << "!\n");
+ << " with trip count " << ULO.Count << "!\n");
if (ORE)
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
L->getHeader())
<< "completely unrolled loop with "
- << NV("UnrollCount", ULO.TripCount) << " iterations";
- });
- } else if (ULO.PeelCount) {
- LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
- << " with iteration count " << ULO.PeelCount << "!\n");
- if (ORE)
- ORE->emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
- L->getHeader())
- << " peeled loop by " << NV("PeelCount", ULO.PeelCount)
- << " iterations";
+ << NV("UnrollCount", ULO.Count) << " iterations";
});
} else {
- auto DiagBuilder = [&]() {
- OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
- L->getHeader());
- return Diag << "unrolled loop by a factor of "
- << NV("UnrollCount", ULO.Count);
- };
-
LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
<< ULO.Count);
- if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) {
- LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
- if (ORE)
- ORE->emit([&]() {
- return DiagBuilder() << " with a breakout at trip "
- << NV("BreakoutTrip", BreakoutTrip);
- });
- } else if (ULO.TripMultiple != 1) {
- LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch");
- if (ORE)
- ORE->emit([&]() {
- return DiagBuilder()
- << " with " << NV("TripMultiple", ULO.TripMultiple)
- << " trips per branch";
- });
- } else if (RuntimeTripCount) {
+ if (ULO.Runtime)
LLVM_DEBUG(dbgs() << " with run-time trip count");
- if (ORE)
- ORE->emit(
- [&]() { return DiagBuilder() << " with run-time trip count"; });
- }
LLVM_DEBUG(dbgs() << "!\n");
+
+ if (ORE)
+ ORE->emit([&]() {
+ OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
+ L->getHeader());
+ Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count);
+ if (ULO.Runtime)
+ Diag << " with run-time trip count";
+ return Diag;
+ });
}
// We are going to make changes to this loop. SCEV may be keeping cached info
@@ -530,12 +460,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (!LatchIsExiting)
++NumUnrolledNotLatch;
- Optional<bool> ContinueOnTrue = None;
- BasicBlock *LoopExit = nullptr;
- if (ExitingBI) {
- ContinueOnTrue = L->contains(ExitingBI->getSuccessor(0));
- LoopExit = ExitingBI->getSuccessor(*ContinueOnTrue);
- }
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
@@ -546,15 +470,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
std::vector<BasicBlock *> Headers;
- std::vector<BasicBlock *> ExitingBlocks;
- std::vector<BasicBlock *> ExitingSucc;
std::vector<BasicBlock *> Latches;
Headers.push_back(Header);
Latches.push_back(LatchBlock);
- if (ExitingBI) {
- ExitingBlocks.push_back(ExitingBI->getParent());
- ExitingSucc.push_back(ExitingBI->getSuccessor(!(*ContinueOnTrue)));
- }
// The current on-the-fly SSA update requires blocks to be processed in
// reverse postorder so that LastValueMap contains the correct value at each
@@ -576,7 +494,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
for (Loop *SubLoop : *L)
LoopsToSimplify.insert(SubLoop);
- if (Header->getParent()->isDebugInfoForProfiling())
+ // When a FSDiscriminator is enabled, we don't need to add the multiply
+ // factors to the discriminators.
+ if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator)
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
@@ -652,12 +572,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// Keep track of the exiting block and its successor block contained in
// the loop for the current iteration.
- if (ExitingBI) {
- if (*BB == ExitingBlocks[0])
- ExitingBlocks.push_back(New);
- if (*BB == ExitingSucc[0])
- ExitingSucc.push_back(New);
- }
+ auto ExitInfoIt = ExitInfos.find(*BB);
+ if (ExitInfoIt != ExitInfos.end())
+ ExitInfoIt->second.ExitingBlocks.push_back(New);
NewBlocks.push_back(New);
UnrolledLoopBlocks.push_back(New);
@@ -666,28 +583,23 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// dedicated entry block (copy of the header block), this header's copy
// dominates all copied blocks. That means, dominance relations in the
// copied body are the same as in the original body.
- if (DT) {
- if (*BB == Header)
- DT->addNewBlock(New, Latches[It - 1]);
- else {
- auto BBDomNode = DT->getNode(*BB);
- auto BBIDom = BBDomNode->getIDom();
- BasicBlock *OriginalBBIDom = BBIDom->getBlock();
- DT->addNewBlock(
- New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
- }
+ if (*BB == Header)
+ DT->addNewBlock(New, Latches[It - 1]);
+ else {
+ auto BBDomNode = DT->getNode(*BB);
+ auto BBIDom = BBDomNode->getIDom();
+ BasicBlock *OriginalBBIDom = BBIDom->getBlock();
+ DT->addNewBlock(
+ New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
}
}
// Remap all instructions in the most recent iteration
remapInstructionsInBlocks(NewBlocks, LastValueMap);
- for (BasicBlock *NewBlock : NewBlocks) {
- for (Instruction &I : *NewBlock) {
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
- }
- }
+ for (BasicBlock *NewBlock : NewBlocks)
+ for (Instruction &I : *NewBlock)
+ if (auto *II = dyn_cast<AssumeInst>(&I))
+ AC->registerAssumption(II);
{
// Identify what other metadata depends on the cloned version. After
@@ -717,116 +629,18 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
- auto setDest = [](BasicBlock *Src, BasicBlock *Dest, BasicBlock *BlockInLoop,
- bool NeedConditional, Optional<bool> ContinueOnTrue,
- bool IsDestLoopExit) {
- auto *Term = cast<BranchInst>(Src->getTerminator());
- if (NeedConditional) {
- // Update the conditional branch's successor for the following
- // iteration.
- assert(ContinueOnTrue.hasValue() &&
- "Expecting valid ContinueOnTrue when NeedConditional is true");
- Term->setSuccessor(!(*ContinueOnTrue), Dest);
- } else {
- // Remove phi operands at this loop exit
- if (!IsDestLoopExit) {
- BasicBlock *BB = Src;
- for (BasicBlock *Succ : successors(BB)) {
- // Preserve the incoming value from BB if we are jumping to the block
- // in the current loop.
- if (Succ == BlockInLoop)
- continue;
- for (PHINode &Phi : Succ->phis())
- Phi.removeIncomingValue(BB, false);
- }
- }
- // Replace the conditional branch with an unconditional one.
- BranchInst::Create(Dest, Term);
- Term->eraseFromParent();
- }
- };
-
// Connect latches of the unrolled iterations to the headers of the next
- // iteration. If the latch is also the exiting block, the conditional branch
- // may have to be preserved.
+ // iteration. Currently they point to the header of the same iteration.
for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- // The branch destination.
unsigned j = (i + 1) % e;
- BasicBlock *Dest = Headers[j];
- bool NeedConditional = LatchIsExiting;
-
- if (LatchIsExiting) {
- if (RuntimeTripCount && j != 0)
- NeedConditional = false;
-
- // For a complete unroll, make the last iteration end with a branch
- // to the exit block.
- if (CompletelyUnroll) {
- if (j == 0)
- Dest = LoopExit;
- // If using trip count upper bound to completely unroll, we need to
- // keep the conditional branch except the last one because the loop
- // may exit after any iteration.
- assert(NeedConditional &&
- "NeedCondition cannot be modified by both complete "
- "unrolling and runtime unrolling");
- NeedConditional =
- (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
- } else if (j != BreakoutTrip &&
- (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
- // If we know the trip count or a multiple of it, we can safely use an
- // unconditional branch for some iterations.
- NeedConditional = false;
- }
- }
-
- setDest(Latches[i], Dest, Headers[i], NeedConditional, ContinueOnTrue,
- Dest == LoopExit);
- }
-
- if (!LatchIsExiting) {
- // If the latch is not exiting, we may be able to simplify the conditional
- // branches in the unrolled exiting blocks.
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- // The branch destination.
- unsigned j = (i + 1) % e;
- bool NeedConditional = true;
-
- if (RuntimeTripCount && j != 0)
- NeedConditional = false;
-
- if (CompletelyUnroll)
- // We cannot drop the conditional branch for the last condition, as we
- // may have to execute the loop body depending on the condition.
- NeedConditional = j == 0 || ULO.PreserveCondBr;
- else if (j != BreakoutTrip &&
- (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
- // If we know the trip count or a multiple of it, we can safely use an
- // unconditional branch for some iterations.
- NeedConditional = false;
-
- // Conditional branches from non-latch exiting block have successors
- // either in the same loop iteration or outside the loop. The branches are
- // already correct.
- if (NeedConditional)
- continue;
- setDest(ExitingBlocks[i], ExitingSucc[i], ExitingSucc[i], NeedConditional,
- None, false);
- }
-
- // When completely unrolling, the last latch becomes unreachable.
- if (CompletelyUnroll) {
- BranchInst *Term = cast<BranchInst>(Latches.back()->getTerminator());
- new UnreachableInst(Term->getContext(), Term);
- Term->eraseFromParent();
- }
+ Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]);
}
// Update dominators of blocks we might reach through exits.
// Immediate dominator of such block might change, because we add more
// routes which can lead to the exit: we can now reach it from the copied
// iterations too.
- if (DT && ULO.Count > 1) {
+ if (ULO.Count > 1) {
for (auto *BB : OriginalLoopBlocks) {
auto *BBDomNode = DT->getNode(BB);
SmallVector<BasicBlock *, 16> ChildrenToUpdate;
@@ -835,42 +649,98 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (!L->contains(ChildBB))
ChildrenToUpdate.push_back(ChildBB);
}
- BasicBlock *NewIDom;
- if (ExitingBI && BB == ExitingBlocks[0]) {
- // The latch is special because we emit unconditional branches in
- // some cases where the original loop contained a conditional branch.
- // Since the latch is always at the bottom of the loop, if the latch
- // dominated an exit before unrolling, the new dominator of that exit
- // must also be a latch. Specifically, the dominator is the first
- // latch which ends in a conditional branch, or the last latch if
- // there is no such latch.
- // For loops exiting from non latch exiting block, we limit the
- // branch simplification to single exiting block loops.
- NewIDom = ExitingBlocks.back();
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- Instruction *Term = ExitingBlocks[i]->getTerminator();
- if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
- NewIDom =
- DT->findNearestCommonDominator(ExitingBlocks[i], Latches[i]);
- break;
- }
- }
- } else {
- // The new idom of the block will be the nearest common dominator
- // of all copies of the previous idom. This is equivalent to the
- // nearest common dominator of the previous idom and the first latch,
- // which dominates all copies of the previous idom.
- NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
- }
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
for (auto *ChildBB : ChildrenToUpdate)
DT->changeImmediateDominator(ChildBB, NewIDom);
}
}
- assert(!DT || !UnrollVerifyDomtree ||
+ assert(!UnrollVerifyDomtree ||
DT->verify(DominatorTree::VerificationLevel::Fast));
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) {
+ auto *Term = cast<BranchInst>(Src->getTerminator());
+ const unsigned Idx = ExitOnTrue ^ WillExit;
+ BasicBlock *Dest = Term->getSuccessor(Idx);
+ BasicBlock *DeadSucc = Term->getSuccessor(1-Idx);
+
+ // Remove predecessors from all non-Dest successors.
+ DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true);
+
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+ Term->eraseFromParent();
+
+ DTU.applyUpdates({{DominatorTree::Delete, Src, DeadSucc}});
+ };
+
+ auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j,
+ bool IsLatch) -> Optional<bool> {
+ if (CompletelyUnroll) {
+ if (PreserveOnlyFirst) {
+ if (i == 0)
+ return None;
+ return j == 0;
+ }
+ // Complete (but possibly inexact) unrolling
+ if (j == 0)
+ return true;
+ if (Info.TripCount && j != Info.TripCount)
+ return false;
+ return None;
+ }
+
+ if (ULO.Runtime) {
+ // If runtime unrolling inserts a prologue, information about non-latch
+ // exits may be stale.
+ if (IsLatch && j != 0)
+ return false;
+ return None;
+ }
+
+ if (j != Info.BreakoutTrip &&
+ (Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) {
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ return false;
+ }
+ return None;
+ };
+
+ // Fold branches for iterations where we know that they will exit or not
+ // exit.
+ for (const auto &Pair : ExitInfos) {
+ const ExitInfo &Info = Pair.second;
+ for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) {
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ bool IsLatch = Pair.first == LatchBlock;
+ Optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch);
+ if (!KnownWillExit)
+ continue;
+
+ // We don't fold known-exiting branches for non-latch exits here,
+ // because this ensures that both all loop blocks and all exit blocks
+ // remain reachable in the CFG.
+ // TODO: We could fold these branches, but it would require much more
+ // sophisticated updates to LoopInfo.
+ if (*KnownWillExit && !IsLatch)
+ continue;
+
+ SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue);
+ }
+ }
+
+ // When completely unrolling, the last latch becomes unreachable.
+ if (!LatchIsExiting && CompletelyUnroll)
+ changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA, &DTU);
+
// Merge adjacent basic blocks, if possible.
for (BasicBlock *Latch : Latches) {
BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
@@ -893,8 +763,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// At this point, the code is well formed. We now simplify the unrolled loop,
// doing constant propagation and dead code elimination as we go.
- simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
- SE, DT, AC, TTI);
+ simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC,
+ TTI);
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;
@@ -915,39 +785,36 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
- // If we have a pass and a DominatorTree we should re-simplify impacted loops
- // to ensure subsequent analyses can rely on this form. We want to simplify
+ // Make sure that loop-simplify form is preserved. We want to simplify
// at least one layer outside of the loop that was unrolled so that any
// changes to the parent loop exposed by the unrolling are considered.
- if (DT) {
- if (OuterL) {
- // OuterL includes all loops for which we can break loop-simplify, so
- // it's sufficient to simplify only it (it'll recursively simplify inner
- // loops too).
- if (NeedToFixLCSSA) {
- // LCSSA must be performed on the outermost affected loop. The unrolled
- // loop's last loop latch is guaranteed to be in the outermost loop
- // after LoopInfo's been updated by LoopInfo::erase.
- Loop *LatchLoop = LI->getLoopFor(Latches.back());
- Loop *FixLCSSALoop = OuterL;
- if (!FixLCSSALoop->contains(LatchLoop))
- while (FixLCSSALoop->getParentLoop() != LatchLoop)
- FixLCSSALoop = FixLCSSALoop->getParentLoop();
-
- formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
- } else if (PreserveLCSSA) {
- assert(OuterL->isLCSSAForm(*DT) &&
- "Loops should be in LCSSA form after loop-unroll.");
- }
-
- // TODO: That potentially might be compile-time expensive. We should try
- // to fix the loop-simplified form incrementally.
- simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
- } else {
- // Simplify loops for which we might've broken loop-simplify form.
- for (Loop *SubLoop : LoopsToSimplify)
- simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
+ if (OuterL) {
+ // OuterL includes all loops for which we can break loop-simplify, so
+ // it's sufficient to simplify only it (it'll recursively simplify inner
+ // loops too).
+ if (NeedToFixLCSSA) {
+ // LCSSA must be performed on the outermost affected loop. The unrolled
+ // loop's last loop latch is guaranteed to be in the outermost loop
+ // after LoopInfo's been updated by LoopInfo::erase.
+ Loop *LatchLoop = LI->getLoopFor(Latches.back());
+ Loop *FixLCSSALoop = OuterL;
+ if (!FixLCSSALoop->contains(LatchLoop))
+ while (FixLCSSALoop->getParentLoop() != LatchLoop)
+ FixLCSSALoop = FixLCSSALoop->getParentLoop();
+
+ formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
+ } else if (PreserveLCSSA) {
+ assert(OuterL->isLCSSAForm(*DT) &&
+ "Loops should be in LCSSA form after loop-unroll.");
}
+
+ // TODO: That potentially might be compile-time expensive. We should try
+ // to fix the loop-simplified form incrementally.
+ simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
+ } else {
+ // Simplify loops for which we might've broken loop-simplify form.
+ for (Loop *SubLoop : LoopsToSimplify)
+ simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
}
return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled