aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Utils/LoopUnrollPeel.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:12 +0000
commite6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch)
tree599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Transforms/Utils/LoopUnrollPeel.cpp
parent1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff)
Diffstat (limited to 'lib/Transforms/Utils/LoopUnrollPeel.cpp')
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp210
1 files changed, 136 insertions, 74 deletions
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 151a285af4e9..005306cf1898 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -1,9 +1,8 @@
//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,6 +61,10 @@ static cl::opt<unsigned> UnrollForcePeelCount(
"unroll-force-peel-count", cl::init(0), cl::Hidden,
cl::desc("Force a peel count regardless of profiling information."));
+static cl::opt<bool> UnrollPeelMultiDeoptExit(
+ "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden,
+ cl::desc("Allow peeling of loops with multiple deopt exits."));
+
// Designates that a Phi is estimated to become invariant after an "infinite"
// number of loop iterations (i.e. only may become an invariant if the loop is
// fully unrolled).
@@ -74,6 +77,22 @@ bool llvm::canPeel(Loop *L) {
if (!L->isLoopSimplifyForm())
return false;
+ if (UnrollPeelMultiDeoptExit) {
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueNonLatchExitBlocks(Exits);
+
+ if (!Exits.empty()) {
+ // Latch's terminator is a conditional branch, Latch is exiting and
+ // all non Latch exits ends up with deoptimize.
+ const BasicBlock *Latch = L->getLoopLatch();
+ const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
+ return T && T->isConditional() && L->isLoopExiting(Latch) &&
+ all_of(Exits, [](const BasicBlock *BB) {
+ return BB->getTerminatingDeoptimizeCall();
+ });
+ }
+ }
+
// Only peel loops that contain a single exit
if (!L->getExitingBlock() || !L->getUniqueExitBlock())
return false;
@@ -363,41 +382,89 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
unsigned IterNumber, unsigned AvgIters,
uint64_t &PeeledHeaderWeight) {
+ if (!PeeledHeaderWeight)
+ return;
// FIXME: Pick a more realistic distribution.
// Currently the proportion of weight we assign to the fall-through
// side of the branch drops linearly with the iteration number, and we use
// a 0.9 fudge factor to make the drop-off less sharp...
- if (PeeledHeaderWeight) {
- uint64_t FallThruWeight =
- PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
- uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
- PeeledHeaderWeight -= ExitWeight;
-
- unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
- MDBuilder MDB(LatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
- : MDB.createBranchWeights(FallThruWeight, ExitWeight);
- LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
- }
+ uint64_t FallThruWeight =
+ PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
+ uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
+ PeeledHeaderWeight -= ExitWeight;
+
+ unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+ MDBuilder MDB(LatchBR->getContext());
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
+ : MDB.createBranchWeights(FallThruWeight, ExitWeight);
+ LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+}
+
+/// Initialize the weights.
+///
+/// \param Header The header block.
+/// \param LatchBR The latch branch.
+/// \param AvgIters The average number of iterations we expect the loop to have.
+/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken.
+/// \param[out] CurHeaderWeight The # of times the header is executed.
+static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
+ unsigned AvgIters, uint64_t &ExitWeight,
+ uint64_t &CurHeaderWeight) {
+ uint64_t TrueWeight, FalseWeight;
+ if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
+ return;
+ unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
+ ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
+ // The # of times the loop body executes is the sum of the exit block
+ // is taken and the # of times the backedges are taken.
+ CurHeaderWeight = TrueWeight + FalseWeight;
+}
+
+/// Update the weights of original Latch block after peeling off all iterations.
+///
+/// \param Header The header block.
+/// \param LatchBR The latch branch.
+/// \param ExitWeight The weight of the edge from Latch to Exit block.
+/// \param CurHeaderWeight The # of time the header is executed.
+static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
+ uint64_t ExitWeight, uint64_t CurHeaderWeight) {
+ // Adjust the branch weights on the loop exit.
+ if (!ExitWeight)
+ return;
+
+ // The backedge count is the difference of current header weight and
+ // current loop exit weight. If the current header weight is smaller than
+ // the current loop exit weight, we mark the loop backedge weight as 1.
+ uint64_t BackEdgeWeight = 0;
+ if (ExitWeight < CurHeaderWeight)
+ BackEdgeWeight = CurHeaderWeight - ExitWeight;
+ else
+ BackEdgeWeight = 1;
+ MDBuilder MDB(LatchBR->getContext());
+ unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+ : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
}
/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
/// peeled off.
-/// \param Exit The exit block of the original loop.
+/// \param ExitEdges The exit edges of the original loop.
/// \param[out] NewBlocks A list of the blocks in the newly created clone
/// \param[out] VMap The value map between the loop and the new clone.
/// \param LoopBlocks A helper for DFS-traversal of the loop.
/// \param LVMap A value-map that maps instructions from the original loop to
/// instructions in the last peeled-off iteration.
-static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
- BasicBlock *InsertBot, BasicBlock *Exit,
- SmallVectorImpl<BasicBlock *> &NewBlocks,
- LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- ValueToValueMapTy &LVMap, DominatorTree *DT,
- LoopInfo *LI) {
+static void cloneLoopBlocks(
+ Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
+ SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges,
+ SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
+ LoopInfo *LI) {
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
BasicBlock *PreHeader = L->getLoopPreheader();
@@ -443,9 +510,11 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
// iteration (for every other iteration)
BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
- unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
- LatchBR->setSuccessor(HeaderIdx, InsertBot);
- LatchBR->setSuccessor(1 - HeaderIdx, Exit);
+ for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
+ if (LatchBR->getSuccessor(idx) == Header) {
+ LatchBR->setSuccessor(idx, InsertBot);
+ break;
+ }
if (DT)
DT->changeImmediateDominator(InsertBot, NewLatch);
@@ -476,14 +545,14 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
// we've just created. Note that this must happen *after* the incoming
// values are adjusted, since the value going out of the latch may also be
// a value coming into the header.
- for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) {
- PHINode *PHI = cast<PHINode>(I);
- Value *LatchVal = PHI->getIncomingValueForBlock(Latch);
- Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
- if (LatchInst && L->contains(LatchInst))
- LatchVal = VMap[LatchVal];
- PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch]));
- }
+ for (auto Edge : ExitEdges)
+ for (PHINode &PHI : Edge.second->phis()) {
+ Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
+ Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
+ if (LatchInst && L->contains(LatchInst))
+ LatchVal = VMap[LatchVal];
+ PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
+ }
// LastValueMap is updated with the values for the current loop
// which are used the next time this function is called.
@@ -512,7 +581,20 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
BasicBlock *Header = L->getHeader();
BasicBlock *PreHeader = L->getLoopPreheader();
BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Exit = L->getUniqueExitBlock();
+ SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
+ L->getExitEdges(ExitEdges);
+
+ DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
+ if (DT) {
+ assert(L->hasDedicatedExits() && "No dedicated exits?");
+ for (auto Edge : ExitEdges) {
+ if (ExitIDom.count(Edge.second))
+ continue;
+ BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
+ assert(L->contains(BB) && "IDom is not in a loop");
+ ExitIDom[Edge.second] = BB;
+ }
+ }
Function *F = Header->getParent();
@@ -577,16 +659,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// newly created branches.
BranchInst *LatchBR =
cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
- unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
-
- uint64_t TrueWeight, FalseWeight;
uint64_t ExitWeight = 0, CurHeaderWeight = 0;
- if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
- ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
- // The # of times the loop body executes is the sum of the exit block
- // weight and the # of times the backedges are taken.
- CurHeaderWeight = TrueWeight + FalseWeight;
- }
+ initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight);
// For each peeled-off iteration, make a copy of the loop.
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
@@ -602,8 +676,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
else
CurHeaderWeight = 1;
- cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
- NewBlocks, LoopBlocks, VMap, LVMap, DT, LI);
+ cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
+ LoopBlocks, VMap, LVMap, DT, LI);
// Remap to use values from the current iteration instead of the
// previous one.
@@ -614,7 +688,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// latter is the first cloned loop body, as original PreHeader dominates
// the original loop body.
if (Iter == 0)
- DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
+ for (auto Exit : ExitIDom)
+ DT->changeImmediateDominator(Exit.first,
+ cast<BasicBlock>(LVMap[Exit.second]));
#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
#endif
@@ -645,36 +721,22 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
if (LatchInst && L->contains(LatchInst))
NewVal = LVMap[LatchInst];
- PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal);
+ PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
}
- // Adjust the branch weights on the loop exit.
- if (ExitWeight) {
- // The backedge count is the difference of current header weight and
- // current loop exit weight. If the current header weight is smaller than
- // the current loop exit weight, we mark the loop backedge weight as 1.
- uint64_t BackEdgeWeight = 0;
- if (ExitWeight < CurHeaderWeight)
- BackEdgeWeight = CurHeaderWeight - ExitWeight;
- else
- BackEdgeWeight = 1;
- MDBuilder MDB(LatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
- : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
- LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
- }
+ fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight);
- // If the loop is nested, we changed the parent loop, update SE.
- if (Loop *ParentLoop = L->getParentLoop()) {
- SE->forgetLoop(ParentLoop);
+ if (Loop *ParentLoop = L->getParentLoop())
+ L = ParentLoop;
- // FIXME: Incrementally update loop-simplify
- simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
- } else {
- // FIXME: Incrementally update loop-simplify
- simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
- }
+ // We modified the loop, update SE.
+ SE->forgetTopmostLoop(L);
+
+ // Finally DomtTree must be correct.
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+
+ // FIXME: Incrementally update loop-simplify
+ simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
NumPeeled++;