diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
commit | eb11fae6d08f479c0799db45860a98af528fa6e7 (patch) | |
tree | 44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Transforms/Utils/LoopUnrollPeel.cpp | |
parent | b8a2042aa938069e862750553db0e4d82d25822c (diff) |
Notes
Diffstat (limited to 'lib/Transforms/Utils/LoopUnrollPeel.cpp')
-rw-r--r-- | lib/Transforms/Utils/LoopUnrollPeel.cpp | 182 |
1 files changed, 147 insertions, 35 deletions
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index c84ae7d693d7..13794c53f24b 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" @@ -30,6 +31,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -46,6 +48,7 @@ #include <limits> using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-unroll" @@ -66,7 +69,7 @@ static const unsigned InfiniteIterationsToInvariance = std::numeric_limits<unsigned>::max(); // Check whether we are capable of peeling this loop. -static bool canPeel(Loop *L) { +bool llvm::canPeel(Loop *L) { // Make sure the loop is in simplified form if (!L->isLoopSimplifyForm()) return false; @@ -136,11 +139,109 @@ static unsigned calculateIterationsToInvariance( return ToInvariance; } +// Return the number of iterations to peel off that make conditions in the +// body true/false. For example, if we peel 2 iterations off the loop below, +// the condition i < 2 can be evaluated at compile time. +// for (i = 0; i < n; i++) +// if (i < 2) +// .. +// else +// .. +// } +static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, + ScalarEvolution &SE) { + assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); + unsigned DesiredPeelCount = 0; + + for (auto *BB : L.blocks()) { + auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + continue; + + // Ignore loop exit condition. + if (L.getLoopLatch() == BB) + continue; + + Value *Condition = BI->getCondition(); + Value *LeftVal, *RightVal; + CmpInst::Predicate Pred; + if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) + continue; + + const SCEV *LeftSCEV = SE.getSCEV(LeftVal); + const SCEV *RightSCEV = SE.getSCEV(RightVal); + + // Do not consider predicates that are known to be true or false + // independently of the loop iteration. + if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) || + SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV, + RightSCEV)) + continue; + + // Check if we have a condition with one AddRec and one non AddRec + // expression. Normalize LeftSCEV to be the AddRec. + if (!isa<SCEVAddRecExpr>(LeftSCEV)) { + if (isa<SCEVAddRecExpr>(RightSCEV)) { + std::swap(LeftSCEV, RightSCEV); + Pred = ICmpInst::getSwappedPredicate(Pred); + } else + continue; + } + + const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV); + + // Avoid huge SCEV computations in the loop below, make sure we only + // consider AddRecs of the loop we are trying to peel and avoid + // non-monotonic predicates, as we will not be able to simplify the loop + // body. + // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can + // simplify the loop, if we peel 1 additional iteration, if there + // is no wrapping. + bool Increasing; + if (!LeftAR->isAffine() || LeftAR->getLoop() != &L || + !SE.isMonotonicPredicate(LeftAR, Pred, Increasing)) + continue; + (void)Increasing; + + // Check if extending the current DesiredPeelCount lets us evaluate Pred + // or !Pred in the loop body statically. + unsigned NewPeelCount = DesiredPeelCount; + + const SCEV *IterVal = LeftAR->evaluateAtIteration( + SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE); + + // If the original condition is not known, get the negated predicate + // (which holds on the else branch) and check if it is known. This allows + // us to peel of iterations that make the original condition false. + if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV)) + Pred = ICmpInst::getInversePredicate(Pred); + + const SCEV *Step = LeftAR->getStepRecurrence(SE); + while (NewPeelCount < MaxPeelCount && + SE.isKnownPredicate(Pred, IterVal, RightSCEV)) { + IterVal = SE.getAddExpr(IterVal, Step); + NewPeelCount++; + } + + // Only peel the loop if the monotonic predicate !Pred becomes known in the + // first iteration of the loop body after peeling. + if (NewPeelCount > DesiredPeelCount && + SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, + RightSCEV)) + DesiredPeelCount = NewPeelCount; + } + + return DesiredPeelCount; +} + // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, - unsigned &TripCount) { + unsigned &TripCount, ScalarEvolution &SE) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); + // Save the UP.PeelCount value set by the target in + // TTI.getUnrollingPreferences or by the flag -unroll-peel-count. + unsigned TargetPeelCount = UP.PeelCount; UP.PeelCount = 0; if (!canPeel(L)) return; @@ -149,6 +250,19 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!L->empty()) return; + // If the user provided a peel count, use that. + bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; + if (UserPeelCount) { + LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount + << " iterations.\n"); + UP.PeelCount = UnrollForcePeelCount; + return; + } + + // Skip peeling if it's disabled. + if (!UP.AllowPeeling) + return; + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N // iterations of the loop. For this we compute the number for iterations after // which every Phi is guaranteed to become an invariant, and try to peel the @@ -160,7 +274,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, SmallDenseMap<PHINode *, unsigned> IterationsToInvariance; // Now go through all Phis to calculate their the number of iterations they // need to become invariants. - unsigned DesiredPeelCount = 0; + // Start the max computation with the UP.PeelCount value set by the target + // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count. + unsigned DesiredPeelCount = TargetPeelCount; BasicBlock *BackEdge = L->getLoopLatch(); assert(BackEdge && "Loop is not in simplified form?"); for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) { @@ -170,15 +286,21 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (ToInvariance != InfiniteIterationsToInvariance) DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); } + + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + + DesiredPeelCount = std::max(DesiredPeelCount, + countToEliminateCompares(*L, MaxPeelCount, SE)); + if (DesiredPeelCount > 0) { - // Pay respect to limitations implied by loop size and the max peel count. - unsigned MaxPeelCount = UnrollPeelMaxCount; - MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); // Consider max peel count limitation. assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); - DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" - << " some Phis into invariants.\n"); + LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount + << " iteration(s) to turn" + << " some Phis into invariants.\n"); UP.PeelCount = DesiredPeelCount; return; } @@ -189,44 +311,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (TripCount) return; - // If the user provided a peel count, use that. - bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; - if (UserPeelCount) { - DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount - << " iterations.\n"); - UP.PeelCount = UnrollForcePeelCount; - return; - } - // If we don't know the trip count, but have reason to believe the average // trip count is low, peeling should be beneficial, since we will usually // hit the peeled section. // We only do this in the presence of profile information, since otherwise // our estimates of the trip count are not reliable enough. - if (UP.AllowPeeling && L->getHeader()->getParent()->hasProfileData()) { + if (L->getHeader()->getParent()->hasProfileData()) { Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L); if (!PeelCount) return; - DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount - << "\n"); + LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount + << "\n"); if (*PeelCount) { if ((*PeelCount <= UnrollPeelMaxCount) && (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { - DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); + LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount + << " iterations.\n"); UP.PeelCount = *PeelCount; return; } - DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); - DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); - DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); - DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); + LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); + LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); + LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) + << "\n"); + LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); } } } -/// \brief Update the branch weights of the latch of a peeled-off loop +/// Update the branch weights of the latch of a peeled-off loop /// iteration. /// This sets the branch weights for the latch of the recently peeled off loop /// iteration correctly. @@ -267,12 +382,12 @@ static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, } } -/// \brief Clones the body of the loop L, putting it between \p InsertTop and \p +/// Clones the body of the loop L, putting it between \p InsertTop and \p /// InsertBot. /// \param IterNumber The serial number of the iteration currently being /// peeled off. /// \param Exit The exit block of the original loop. -/// \param[out] NewBlocks A list of the the blocks in the newly created clone +/// \param[out] NewBlocks A list of the blocks in the newly created clone /// \param[out] VMap The value map between the loop and the new clone. /// \param LoopBlocks A helper for DFS-traversal of the loop. /// \param LVMap A value-map that maps instructions from the original loop to @@ -376,7 +491,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, LVMap[KV.first] = KV.second; } -/// \brief Peel off the first \p PeelCount iterations of loop \p L. +/// Peel off the first \p PeelCount iterations of loop \p L. /// /// Note that this does not peel them off as a single straight-line block. /// Rather, each iteration is peeled off separately, and needs to check the @@ -388,8 +503,8 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA) { - if (!canPeel(L)) - return false; + assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); + assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); @@ -500,10 +615,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // the original loop body. if (Iter == 0) DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch])); -#ifndef NDEBUG - if (VerifyDomInfo) - DT->verifyDomTree(); -#endif + assert(DT->verify(DominatorTree::VerificationLevel::Fast)); } updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter, |