summaryrefslogtreecommitdiff
path: root/lib/Transforms/Utils/LoopUnrollPeel.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
commiteb11fae6d08f479c0799db45860a98af528fa6e7 (patch)
tree44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Transforms/Utils/LoopUnrollPeel.cpp
parentb8a2042aa938069e862750553db0e4d82d25822c (diff)
Notes
Diffstat (limited to 'lib/Transforms/Utils/LoopUnrollPeel.cpp')
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp182
1 files changed, 147 insertions, 35 deletions
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index c84ae7d693d7..13794c53f24b 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
@@ -30,6 +31,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -46,6 +48,7 @@
#include <limits>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-unroll"
@@ -66,7 +69,7 @@ static const unsigned InfiniteIterationsToInvariance =
std::numeric_limits<unsigned>::max();
// Check whether we are capable of peeling this loop.
-static bool canPeel(Loop *L) {
+bool llvm::canPeel(Loop *L) {
// Make sure the loop is in simplified form
if (!L->isLoopSimplifyForm())
return false;
@@ -136,11 +139,109 @@ static unsigned calculateIterationsToInvariance(
return ToInvariance;
}
+// Return the number of iterations to peel off that make conditions in the
+// body true/false. For example, if we peel 2 iterations off the loop below,
+// the condition i < 2 can be evaluated at compile time.
+// for (i = 0; i < n; i++)
+// if (i < 2)
+// ..
+// else
+// ..
+// }
+static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
+ ScalarEvolution &SE) {
+ assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
+ unsigned DesiredPeelCount = 0;
+
+ for (auto *BB : L.blocks()) {
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || BI->isUnconditional())
+ continue;
+
+ // Ignore loop exit condition.
+ if (L.getLoopLatch() == BB)
+ continue;
+
+ Value *Condition = BI->getCondition();
+ Value *LeftVal, *RightVal;
+ CmpInst::Predicate Pred;
+ if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
+ continue;
+
+ const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
+ const SCEV *RightSCEV = SE.getSCEV(RightVal);
+
+ // Do not consider predicates that are known to be true or false
+ // independently of the loop iteration.
+ if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) ||
+ SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV,
+ RightSCEV))
+ continue;
+
+ // Check if we have a condition with one AddRec and one non AddRec
+ // expression. Normalize LeftSCEV to be the AddRec.
+ if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
+ if (isa<SCEVAddRecExpr>(RightSCEV)) {
+ std::swap(LeftSCEV, RightSCEV);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ } else
+ continue;
+ }
+
+ const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
+
+ // Avoid huge SCEV computations in the loop below, make sure we only
+ // consider AddRecs of the loop we are trying to peel and avoid
+ // non-monotonic predicates, as we will not be able to simplify the loop
+ // body.
+ // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
+ // simplify the loop, if we peel 1 additional iteration, if there
+ // is no wrapping.
+ bool Increasing;
+ if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
+ !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
+ continue;
+ (void)Increasing;
+
+ // Check if extending the current DesiredPeelCount lets us evaluate Pred
+ // or !Pred in the loop body statically.
+ unsigned NewPeelCount = DesiredPeelCount;
+
+ const SCEV *IterVal = LeftAR->evaluateAtIteration(
+ SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
+
+ // If the original condition is not known, get the negated predicate
+ // (which holds on the else branch) and check if it is known. This allows
+ // us to peel of iterations that make the original condition false.
+ if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
+ Pred = ICmpInst::getInversePredicate(Pred);
+
+ const SCEV *Step = LeftAR->getStepRecurrence(SE);
+ while (NewPeelCount < MaxPeelCount &&
+ SE.isKnownPredicate(Pred, IterVal, RightSCEV)) {
+ IterVal = SE.getAddExpr(IterVal, Step);
+ NewPeelCount++;
+ }
+
+ // Only peel the loop if the monotonic predicate !Pred becomes known in the
+ // first iteration of the loop body after peeling.
+ if (NewPeelCount > DesiredPeelCount &&
+ SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
+ RightSCEV))
+ DesiredPeelCount = NewPeelCount;
+ }
+
+ return DesiredPeelCount;
+}
+
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
- unsigned &TripCount) {
+ unsigned &TripCount, ScalarEvolution &SE) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
+ // Save the UP.PeelCount value set by the target in
+ // TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
+ unsigned TargetPeelCount = UP.PeelCount;
UP.PeelCount = 0;
if (!canPeel(L))
return;
@@ -149,6 +250,19 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (!L->empty())
return;
+ // If the user provided a peel count, use that.
+ bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
+ if (UserPeelCount) {
+ LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
+ << " iterations.\n");
+ UP.PeelCount = UnrollForcePeelCount;
+ return;
+ }
+
+ // Skip peeling if it's disabled.
+ if (!UP.AllowPeeling)
+ return;
+
// Here we try to get rid of Phis which become invariants after 1, 2, ..., N
// iterations of the loop. For this we compute the number for iterations after
// which every Phi is guaranteed to become an invariant, and try to peel the
@@ -160,7 +274,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
// Now go through all Phis to calculate their the number of iterations they
// need to become invariants.
- unsigned DesiredPeelCount = 0;
+ // Start the max computation with the UP.PeelCount value set by the target
+ // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
+ unsigned DesiredPeelCount = TargetPeelCount;
BasicBlock *BackEdge = L->getLoopLatch();
assert(BackEdge && "Loop is not in simplified form?");
for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
@@ -170,15 +286,21 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (ToInvariance != InfiniteIterationsToInvariance)
DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
}
+
+ // Pay respect to limitations implied by loop size and the max peel count.
+ unsigned MaxPeelCount = UnrollPeelMaxCount;
+ MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
+
+ DesiredPeelCount = std::max(DesiredPeelCount,
+ countToEliminateCompares(*L, MaxPeelCount, SE));
+
if (DesiredPeelCount > 0) {
- // Pay respect to limitations implied by loop size and the max peel count.
- unsigned MaxPeelCount = UnrollPeelMaxCount;
- MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
// Consider max peel count limitation.
assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
- DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn"
- << " some Phis into invariants.\n");
+ LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
+ << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
UP.PeelCount = DesiredPeelCount;
return;
}
@@ -189,44 +311,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (TripCount)
return;
- // If the user provided a peel count, use that.
- bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
- if (UserPeelCount) {
- DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
- << " iterations.\n");
- UP.PeelCount = UnrollForcePeelCount;
- return;
- }
-
// If we don't know the trip count, but have reason to believe the average
// trip count is low, peeling should be beneficial, since we will usually
// hit the peeled section.
// We only do this in the presence of profile information, since otherwise
// our estimates of the trip count are not reliable enough.
- if (UP.AllowPeeling && L->getHeader()->getParent()->hasProfileData()) {
+ if (L->getHeader()->getParent()->hasProfileData()) {
Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
if (!PeelCount)
return;
- DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
- << "\n");
+ LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
+ << "\n");
if (*PeelCount) {
if ((*PeelCount <= UnrollPeelMaxCount) &&
(LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
- DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n");
+ LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
+ << " iterations.\n");
UP.PeelCount = *PeelCount;
return;
}
- DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
- DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
- DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n");
- DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
+ LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
+ LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
+ LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
+ << "\n");
+ LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
}
}
}
-/// \brief Update the branch weights of the latch of a peeled-off loop
+/// Update the branch weights of the latch of a peeled-off loop
/// iteration.
/// This sets the branch weights for the latch of the recently peeled off loop
/// iteration correctly.
@@ -267,12 +382,12 @@ static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
}
}
-/// \brief Clones the body of the loop L, putting it between \p InsertTop and \p
+/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
/// peeled off.
/// \param Exit The exit block of the original loop.
-/// \param[out] NewBlocks A list of the the blocks in the newly created clone
+/// \param[out] NewBlocks A list of the blocks in the newly created clone
/// \param[out] VMap The value map between the loop and the new clone.
/// \param LoopBlocks A helper for DFS-traversal of the loop.
/// \param LVMap A value-map that maps instructions from the original loop to
@@ -376,7 +491,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
LVMap[KV.first] = KV.second;
}
-/// \brief Peel off the first \p PeelCount iterations of loop \p L.
+/// Peel off the first \p PeelCount iterations of loop \p L.
///
/// Note that this does not peel them off as a single straight-line block.
/// Rather, each iteration is peeled off separately, and needs to check the
@@ -388,8 +503,8 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
AssumptionCache *AC, bool PreserveLCSSA) {
- if (!canPeel(L))
- return false;
+ assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
+ assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
LoopBlocksDFS LoopBlocks(L);
LoopBlocks.perform(LI);
@@ -500,10 +615,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// the original loop body.
if (Iter == 0)
DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
-#ifndef NDEBUG
- if (VerifyDomInfo)
- DT->verifyDomTree();
-#endif
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast));
}
updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter,