diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopUnrollPass.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopUnrollPass.cpp | 128 |
1 files changed, 82 insertions, 46 deletions
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 2fa7436213dd5..a6d4164c36455 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -178,7 +178,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional<unsigned> UserThreshold, Optional<unsigned> UserCount, Optional<bool> UserAllowPartial, Optional<bool> UserRuntime, - Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) { + Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling, + Optional<bool> UserAllowProfileBasedPeeling, + Optional<unsigned> UserFullUnrollMaxCount) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults @@ -202,6 +204,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.UpperBound = false; UP.AllowPeeling = true; UP.UnrollAndJam = false; + UP.PeelProfiledIterations = true; UP.UnrollAndJamInnerLoopThreshold = 60; // Override with any target specific settings @@ -257,6 +260,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.UpperBound = *UserUpperBound; if (UserAllowPeeling.hasValue()) UP.AllowPeeling = *UserAllowPeeling; + if (UserAllowProfileBasedPeeling.hasValue()) + UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; + if (UserFullUnrollMaxCount.hasValue()) + UP.FullUnrollMaxCount = *UserFullUnrollMaxCount; return UP; } @@ -730,7 +737,7 @@ bool llvm::computeUnrollCount( Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, - unsigned &TripMultiple, unsigned LoopSize, + bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) { // Check for explicit Count. @@ -781,18 +788,34 @@ bool llvm::computeUnrollCount( // Also we need to check if we exceed FullUnrollMaxCount. // If using the upper bound to unroll, TripMultiple should be set to 1 because // we do not know when loop may exit. - // MaxTripCount and ExactTripCount cannot both be non zero since we only + + // We can unroll by the upper bound amount if it's generally allowed or if + // we know that the loop is executed either the upper bound or zero times. + // (MaxOrZero unrolling keeps only the first loop test, so the number of + // loop tests remains the same compared to the non-unrolled version, whereas + // the generic upper bound unrolling keeps all but the last loop test so the + // number of loop tests goes up which may end up being worse on targets with + // constrained branch predictor resources so is controlled by an option.) + // In addition we only unroll small upper bounds. + unsigned FullUnrollMaxTripCount = MaxTripCount; + if (!(UP.UpperBound || MaxOrZero) || + FullUnrollMaxTripCount > UnrollMaxUpperBound) + FullUnrollMaxTripCount = 0; + + // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only // compute the former when the latter is zero. unsigned ExactTripCount = TripCount; - assert((ExactTripCount == 0 || MaxTripCount == 0) && - "ExtractTripCount and MaxTripCount cannot both be non zero."); - unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount; + assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && + "ExtractTripCount and UnrollByMaxCount cannot both be non zero."); + + unsigned FullUnrollTripCount = + ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount; UP.Count = FullUnrollTripCount; if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { // When computing the unrolled size, note that BEInsns are not replicated // like the rest of the loop body. if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) { - UseUpperBound = (MaxTripCount == FullUnrollTripCount); + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); TripCount = FullUnrollTripCount; TripMultiple = UP.UpperBound ? 1 : TripMultiple; return ExplicitUnroll; @@ -806,7 +829,7 @@ bool llvm::computeUnrollCount( unsigned Boost = getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { - UseUpperBound = (MaxTripCount == FullUnrollTripCount); + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); TripCount = FullUnrollTripCount; TripMultiple = UP.UpperBound ? 1 : TripMultiple; return ExplicitUnroll; @@ -882,6 +905,8 @@ bool llvm::computeUnrollCount( "because " "unrolled size is too large."; }); + LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count + << "\n"); return ExplicitUnroll; } assert(TripCount == 0 && @@ -903,6 +928,12 @@ bool llvm::computeUnrollCount( return false; } + // Don't unroll a small upper bound loop unless user or TTI asked to do so. + if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) { + UP.Count = 0; + return false; + } + // Check if the runtime trip count is too small when profile is available. if (L->getHeader()->getParent()->hasProfileData()) { if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) { @@ -966,7 +997,11 @@ bool llvm::computeUnrollCount( if (UP.Count > UP.MaxCount) UP.Count = UP.MaxCount; - LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count + + if (MaxTripCount && UP.Count > MaxTripCount) + UP.Count = MaxTripCount; + + LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count << "\n"); if (UP.Count < 2) UP.Count = 0; @@ -976,13 +1011,14 @@ bool llvm::computeUnrollCount( static LoopUnrollResult tryToUnrollLoop( Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, - OptimizationRemarkEmitter &ORE, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - bool PreserveLCSSA, int OptLevel, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound, - Optional<bool> ProvidedAllowPeeling) { + Optional<bool> ProvidedAllowPeeling, + Optional<bool> ProvidedAllowProfileBasedPeeling, + Optional<unsigned> ProvidedFullUnrollMaxCount) { LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); @@ -1007,7 +1043,8 @@ static LoopUnrollResult tryToUnrollLoop( TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, - ProvidedAllowPeeling); + ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, + ProvidedFullUnrollMaxCount); // Exit early if unrolling is disabled. For OptForSize, we pick the loop size // as threshold later on. @@ -1028,10 +1065,10 @@ static LoopUnrollResult tryToUnrollLoop( return LoopUnrollResult::Unmodified; } - // When optimizing for size, use LoopSize as threshold, to (fully) unroll - // loops, if it does not increase code size. + // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold + // later), to (fully) unroll loops, if it does not increase code size. if (OptForSize) - UP.Threshold = std::max(UP.Threshold, LoopSize); + UP.Threshold = std::max(UP.Threshold, LoopSize + 1); if (NumInlineCandidates != 0) { LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); @@ -1040,7 +1077,6 @@ static LoopUnrollResult tryToUnrollLoop( // Find trip count and trip multiple if count is not available unsigned TripCount = 0; - unsigned MaxTripCount = 0; unsigned TripMultiple = 1; // If there are multiple exiting blocks but one of them is the latch, use the // latch for the trip count estimation. Otherwise insist on a single exiting @@ -1070,28 +1106,18 @@ static LoopUnrollResult tryToUnrollLoop( // Try to find the trip count upper bound if we cannot find the exact trip // count. + unsigned MaxTripCount = 0; bool MaxOrZero = false; if (!TripCount) { MaxTripCount = SE.getSmallConstantMaxTripCount(L); MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L); - // We can unroll by the upper bound amount if it's generally allowed or if - // we know that the loop is executed either the upper bound or zero times. - // (MaxOrZero unrolling keeps only the first loop test, so the number of - // loop tests remains the same compared to the non-unrolled version, whereas - // the generic upper bound unrolling keeps all but the last loop test so the - // number of loop tests goes up which may end up being worse on targets with - // constrained branch predictor resources so is controlled by an option.) - // In addition we only unroll small upper bounds. - if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) { - MaxTripCount = 0; - } } // computeUnrollCount() decides whether it is beneficial to use upper bound to // fully unroll the loop. bool UseUpperBound = false; bool IsCountSetExplicitly = computeUnrollCount( - L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, + L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, UseUpperBound); if (!UP.Count) return LoopUnrollResult::Unmodified; @@ -1139,7 +1165,7 @@ static LoopUnrollResult tryToUnrollLoop( // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. if (UnrollResult != LoopUnrollResult::FullyUnrolled && - (IsCountSetExplicitly || UP.PeelCount)) + (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount))) L->setLoopAlreadyUnrolled(); return UnrollResult; @@ -1169,18 +1195,24 @@ public: Optional<bool> ProvidedRuntime; Optional<bool> ProvidedUpperBound; Optional<bool> ProvidedAllowPeeling; + Optional<bool> ProvidedAllowProfileBasedPeeling; + Optional<unsigned> ProvidedFullUnrollMaxCount; LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None, Optional<unsigned> Count = None, Optional<bool> AllowPartial = None, Optional<bool> Runtime = None, Optional<bool> UpperBound = None, - Optional<bool> AllowPeeling = None) + Optional<bool> AllowPeeling = None, + Optional<bool> AllowProfileBasedPeeling = None, + Optional<unsigned> ProvidedFullUnrollMaxCount = None) : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced), ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound), - ProvidedAllowPeeling(AllowPeeling) { + ProvidedAllowPeeling(AllowPeeling), + ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling), + ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) { initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -1203,10 +1235,11 @@ public: bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); LoopUnrollResult Result = tryToUnrollLoop( - L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, - PreserveLCSSA, OptLevel, OnlyWhenForced, - ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, - ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); + L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel, + OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, + ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, + ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, + ProvidedFullUnrollMaxCount); if (Result == LoopUnrollResult::FullyUnrolled) LPM.markLoopAsDeleted(*L); @@ -1283,14 +1316,16 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, std::string LoopName = L.getName(); - bool Changed = - tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, - /*BFI*/ nullptr, /*PSI*/ nullptr, - /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced, - ForgetSCEV, /*Count*/ None, - /*Threshold*/ None, /*AllowPartial*/ false, - /*Runtime*/ false, /*UpperBound*/ false, - /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified; + bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, + /*BFI*/ nullptr, /*PSI*/ nullptr, + /*PreserveLCSSA*/ true, OptLevel, + OnlyWhenForced, ForgetSCEV, /*Count*/ None, + /*Threshold*/ None, /*AllowPartial*/ false, + /*Runtime*/ false, /*UpperBound*/ false, + /*AllowPeeling*/ false, + /*AllowProfileBasedPeeling*/ false, + /*FullUnrollMaxCount*/ None) != + LoopUnrollResult::Unmodified; if (!Changed) return PreservedAnalyses::all(); @@ -1430,7 +1465,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV, /*Count*/ None, /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, - UnrollOpts.AllowUpperBound, LocalAllowPeeling); + UnrollOpts.AllowUpperBound, LocalAllowPeeling, + UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount); Changed |= Result != LoopUnrollResult::Unmodified; // The parent must not be damaged by unrolling! |