summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopUnrollPass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopUnrollPass.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp128
1 files changed, 82 insertions, 46 deletions
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 2fa7436213dd5..a6d4164c36455 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -178,7 +178,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
- Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
+ Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
+ Optional<bool> UserAllowProfileBasedPeeling,
+ Optional<unsigned> UserFullUnrollMaxCount) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
@@ -202,6 +204,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.UpperBound = false;
UP.AllowPeeling = true;
UP.UnrollAndJam = false;
+ UP.PeelProfiledIterations = true;
UP.UnrollAndJamInnerLoopThreshold = 60;
// Override with any target specific settings
@@ -257,6 +260,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.UpperBound = *UserUpperBound;
if (UserAllowPeeling.hasValue())
UP.AllowPeeling = *UserAllowPeeling;
+ if (UserAllowProfileBasedPeeling.hasValue())
+ UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
+ if (UserFullUnrollMaxCount.hasValue())
+ UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
return UP;
}
@@ -730,7 +737,7 @@ bool llvm::computeUnrollCount(
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
- unsigned &TripMultiple, unsigned LoopSize,
+ bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
// Check for explicit Count.
@@ -781,18 +788,34 @@ bool llvm::computeUnrollCount(
// Also we need to check if we exceed FullUnrollMaxCount.
// If using the upper bound to unroll, TripMultiple should be set to 1 because
// we do not know when loop may exit.
- // MaxTripCount and ExactTripCount cannot both be non zero since we only
+
+ // We can unroll by the upper bound amount if it's generally allowed or if
+ // we know that the loop is executed either the upper bound or zero times.
+ // (MaxOrZero unrolling keeps only the first loop test, so the number of
+ // loop tests remains the same compared to the non-unrolled version, whereas
+ // the generic upper bound unrolling keeps all but the last loop test so the
+ // number of loop tests goes up which may end up being worse on targets with
+ // constrained branch predictor resources so is controlled by an option.)
+ // In addition we only unroll small upper bounds.
+ unsigned FullUnrollMaxTripCount = MaxTripCount;
+ if (!(UP.UpperBound || MaxOrZero) ||
+ FullUnrollMaxTripCount > UnrollMaxUpperBound)
+ FullUnrollMaxTripCount = 0;
+
+ // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only
// compute the former when the latter is zero.
unsigned ExactTripCount = TripCount;
- assert((ExactTripCount == 0 || MaxTripCount == 0) &&
- "ExtractTripCount and MaxTripCount cannot both be non zero.");
- unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
+ assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) &&
+ "ExtractTripCount and UnrollByMaxCount cannot both be non zero.");
+
+ unsigned FullUnrollTripCount =
+ ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
UP.Count = FullUnrollTripCount;
if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
// When computing the unrolled size, note that BEInsns are not replicated
// like the rest of the loop body.
if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
- UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
TripCount = FullUnrollTripCount;
TripMultiple = UP.UpperBound ? 1 : TripMultiple;
return ExplicitUnroll;
@@ -806,7 +829,7 @@ bool llvm::computeUnrollCount(
unsigned Boost =
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
- UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
TripCount = FullUnrollTripCount;
TripMultiple = UP.UpperBound ? 1 : TripMultiple;
return ExplicitUnroll;
@@ -882,6 +905,8 @@ bool llvm::computeUnrollCount(
"because "
"unrolled size is too large.";
});
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
+ << "\n");
return ExplicitUnroll;
}
assert(TripCount == 0 &&
@@ -903,6 +928,12 @@ bool llvm::computeUnrollCount(
return false;
}
+ // Don't unroll a small upper bound loop unless user or TTI asked to do so.
+ if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
+ UP.Count = 0;
+ return false;
+ }
+
// Check if the runtime trip count is too small when profile is available.
if (L->getHeader()->getParent()->hasProfileData()) {
if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
@@ -966,7 +997,11 @@ bool llvm::computeUnrollCount(
if (UP.Count > UP.MaxCount)
UP.Count = UP.MaxCount;
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
+
+ if (MaxTripCount && UP.Count > MaxTripCount)
+ UP.Count = MaxTripCount;
+
+ LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count
<< "\n");
if (UP.Count < 2)
UP.Count = 0;
@@ -976,13 +1011,14 @@ bool llvm::computeUnrollCount(
static LoopUnrollResult tryToUnrollLoop(
Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
const TargetTransformInfo &TTI, AssumptionCache &AC,
- OptimizationRemarkEmitter &ORE,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
- bool PreserveLCSSA, int OptLevel,
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
- Optional<bool> ProvidedAllowPeeling) {
+ Optional<bool> ProvidedAllowPeeling,
+ Optional<bool> ProvidedAllowProfileBasedPeeling,
+ Optional<unsigned> ProvidedFullUnrollMaxCount) {
LLVM_DEBUG(dbgs() << "Loop Unroll: F["
<< L->getHeader()->getParent()->getName() << "] Loop %"
<< L->getHeader()->getName() << "\n");
@@ -1007,7 +1043,8 @@ static LoopUnrollResult tryToUnrollLoop(
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
- ProvidedAllowPeeling);
+ ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
+ ProvidedFullUnrollMaxCount);
// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
// as threshold later on.
@@ -1028,10 +1065,10 @@ static LoopUnrollResult tryToUnrollLoop(
return LoopUnrollResult::Unmodified;
}
- // When optimizing for size, use LoopSize as threshold, to (fully) unroll
- // loops, if it does not increase code size.
+ // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
+ // later), to (fully) unroll loops, if it does not increase code size.
if (OptForSize)
- UP.Threshold = std::max(UP.Threshold, LoopSize);
+ UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
if (NumInlineCandidates != 0) {
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
@@ -1040,7 +1077,6 @@ static LoopUnrollResult tryToUnrollLoop(
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
- unsigned MaxTripCount = 0;
unsigned TripMultiple = 1;
// If there are multiple exiting blocks but one of them is the latch, use the
// latch for the trip count estimation. Otherwise insist on a single exiting
@@ -1070,28 +1106,18 @@ static LoopUnrollResult tryToUnrollLoop(
// Try to find the trip count upper bound if we cannot find the exact trip
// count.
+ unsigned MaxTripCount = 0;
bool MaxOrZero = false;
if (!TripCount) {
MaxTripCount = SE.getSmallConstantMaxTripCount(L);
MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
- // We can unroll by the upper bound amount if it's generally allowed or if
- // we know that the loop is executed either the upper bound or zero times.
- // (MaxOrZero unrolling keeps only the first loop test, so the number of
- // loop tests remains the same compared to the non-unrolled version, whereas
- // the generic upper bound unrolling keeps all but the last loop test so the
- // number of loop tests goes up which may end up being worse on targets with
- // constrained branch predictor resources so is controlled by an option.)
- // In addition we only unroll small upper bounds.
- if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
- MaxTripCount = 0;
- }
}
// computeUnrollCount() decides whether it is beneficial to use upper bound to
// fully unroll the loop.
bool UseUpperBound = false;
bool IsCountSetExplicitly = computeUnrollCount(
- L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
+ L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
TripMultiple, LoopSize, UP, UseUpperBound);
if (!UP.Count)
return LoopUnrollResult::Unmodified;
@@ -1139,7 +1165,7 @@ static LoopUnrollResult tryToUnrollLoop(
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
- (IsCountSetExplicitly || UP.PeelCount))
+ (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
L->setLoopAlreadyUnrolled();
return UnrollResult;
@@ -1169,18 +1195,24 @@ public:
Optional<bool> ProvidedRuntime;
Optional<bool> ProvidedUpperBound;
Optional<bool> ProvidedAllowPeeling;
+ Optional<bool> ProvidedAllowProfileBasedPeeling;
+ Optional<unsigned> ProvidedFullUnrollMaxCount;
LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
Optional<bool> UpperBound = None,
- Optional<bool> AllowPeeling = None)
+ Optional<bool> AllowPeeling = None,
+ Optional<bool> AllowProfileBasedPeeling = None,
+ Optional<unsigned> ProvidedFullUnrollMaxCount = None)
: LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
- ProvidedAllowPeeling(AllowPeeling) {
+ ProvidedAllowPeeling(AllowPeeling),
+ ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
+ ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -1203,10 +1235,11 @@ public:
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
LoopUnrollResult Result = tryToUnrollLoop(
- L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
- PreserveLCSSA, OptLevel, OnlyWhenForced,
- ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
- ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
+ L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
+ OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
+ ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
+ ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
+ ProvidedFullUnrollMaxCount);
if (Result == LoopUnrollResult::FullyUnrolled)
LPM.markLoopAsDeleted(*L);
@@ -1283,14 +1316,16 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
std::string LoopName = L.getName();
- bool Changed =
- tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
- /*BFI*/ nullptr, /*PSI*/ nullptr,
- /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
- ForgetSCEV, /*Count*/ None,
- /*Threshold*/ None, /*AllowPartial*/ false,
- /*Runtime*/ false, /*UpperBound*/ false,
- /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
+ bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
+ /*BFI*/ nullptr, /*PSI*/ nullptr,
+ /*PreserveLCSSA*/ true, OptLevel,
+ OnlyWhenForced, ForgetSCEV, /*Count*/ None,
+ /*Threshold*/ None, /*AllowPartial*/ false,
+ /*Runtime*/ false, /*UpperBound*/ false,
+ /*AllowPeeling*/ false,
+ /*AllowProfileBasedPeeling*/ false,
+ /*FullUnrollMaxCount*/ None) !=
+ LoopUnrollResult::Unmodified;
if (!Changed)
return PreservedAnalyses::all();
@@ -1430,7 +1465,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
UnrollOpts.ForgetSCEV, /*Count*/ None,
/*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
- UnrollOpts.AllowUpperBound, LocalAllowPeeling);
+ UnrollOpts.AllowUpperBound, LocalAllowPeeling,
+ UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
Changed |= Result != LoopUnrollResult::Unmodified;
// The parent must not be damaged by unrolling!