summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
Notes
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp181
1 files changed, 97 insertions, 84 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 4c2b079c6bb5b..87f40bb7ba852 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -154,6 +154,10 @@ static cl::opt<bool>
cl::desc("Allows loops to be peeled when the dynamic "
"trip count is known to be low."));
+static cl::opt<bool> UnrollAllowLoopNestsPeeling(
+ "unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden,
+ cl::desc("Allows loop nests to be peeled."));
+
static cl::opt<bool> UnrollUnrollRemainder(
"unroll-remainder", cl::Hidden,
cl::desc("Allow the loop remainder to be unrolled."));
@@ -167,6 +171,16 @@ static cl::opt<bool> UnrollRevisitChildLoops(
"This shouldn't typically be needed as child loops (or their "
"clones) were already visited."));
+static cl::opt<unsigned> UnrollThresholdAggressive(
+ "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
+ cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
+ "optimizations"));
+static cl::opt<unsigned>
+ UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
+ cl::Hidden,
+ cl::desc("Default threshold (max size of unrolled "
+ "loop), used in all but O3 optimizations"));
+
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
@@ -179,19 +193,17 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
- Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
- Optional<bool> UserAllowProfileBasedPeeling,
- Optional<unsigned> UserFullUnrollMaxCount) {
+ Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
- UP.Threshold = OptLevel > 2 ? 300 : 150;
+ UP.Threshold =
+ OptLevel > 2 ? UnrollThresholdAggressive : UnrollThresholdDefault;
UP.MaxPercentThresholdBoost = 400;
UP.OptSizeThreshold = 0;
UP.PartialThreshold = 150;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
- UP.PeelCount = 0;
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = std::numeric_limits<unsigned>::max();
UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
@@ -203,10 +215,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.AllowExpensiveTripCount = false;
UP.Force = false;
UP.UpperBound = false;
- UP.AllowPeeling = true;
UP.UnrollAndJam = false;
- UP.PeelProfiledIterations = true;
UP.UnrollAndJamInnerLoopThreshold = 60;
+ UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
// Override with any target specific settings
TTI.getUnrollingPreferences(L, SE, UP);
@@ -232,8 +243,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxCount = UnrollMaxCount;
if (UnrollFullMaxCount.getNumOccurrences() > 0)
UP.FullUnrollMaxCount = UnrollFullMaxCount;
- if (UnrollPeelCount.getNumOccurrences() > 0)
- UP.PeelCount = UnrollPeelCount;
if (UnrollAllowPartial.getNumOccurrences() > 0)
UP.Partial = UnrollAllowPartial;
if (UnrollAllowRemainder.getNumOccurrences() > 0)
@@ -242,10 +251,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Runtime = UnrollRuntime;
if (UnrollMaxUpperBound == 0)
UP.UpperBound = false;
- if (UnrollAllowPeeling.getNumOccurrences() > 0)
- UP.AllowPeeling = UnrollAllowPeeling;
if (UnrollUnrollRemainder.getNumOccurrences() > 0)
UP.UnrollRemainder = UnrollUnrollRemainder;
+ if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
+ UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
// Apply user values provided by argument
if (UserThreshold.hasValue()) {
@@ -260,16 +269,45 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Runtime = *UserRuntime;
if (UserUpperBound.hasValue())
UP.UpperBound = *UserUpperBound;
- if (UserAllowPeeling.hasValue())
- UP.AllowPeeling = *UserAllowPeeling;
- if (UserAllowProfileBasedPeeling.hasValue())
- UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
if (UserFullUnrollMaxCount.hasValue())
UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
return UP;
}
+TargetTransformInfo::PeelingPreferences
+llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI,
+ Optional<bool> UserAllowPeeling,
+ Optional<bool> UserAllowProfileBasedPeeling) {
+ TargetTransformInfo::PeelingPreferences PP;
+
+ // Default values
+ PP.PeelCount = 0;
+ PP.AllowPeeling = true;
+ PP.AllowLoopNestsPeeling = false;
+ PP.PeelProfiledIterations = true;
+
+ // Get Target Specifc Values
+ TTI.getPeelingPreferences(L, SE, PP);
+
+ // User Specified Values using cl::opt
+ if (UnrollPeelCount.getNumOccurrences() > 0)
+ PP.PeelCount = UnrollPeelCount;
+ if (UnrollAllowPeeling.getNumOccurrences() > 0)
+ PP.AllowPeeling = UnrollAllowPeeling;
+ if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
+ PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
+
+ // User Specifed values provided by argument
+ if (UserAllowPeeling.hasValue())
+ PP.AllowPeeling = *UserAllowPeeling;
+ if (UserAllowProfileBasedPeeling.hasValue())
+ PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
+
+ return PP;
+}
+
namespace {
/// A struct to densely store the state of an instruction after unrolling at
@@ -335,11 +373,12 @@ struct EstimatedUnrollCost {
static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
const SmallPtrSetImpl<const Value *> &EphValues,
- const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {
+ const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
+ unsigned MaxIterationsCountToAnalyze) {
// We want to be able to scale offsets by the trip count and add more offsets
// to them without checking for overflows, and we already don't want to
// analyze *massive* trip counts, so we force the max to be reasonably small.
- assert(UnrollMaxIterationsCountToAnalyze <
+ assert(MaxIterationsCountToAnalyze <
(unsigned)(std::numeric_limits<int>::max() / 2) &&
"The unroll iterations max is too large!");
@@ -349,8 +388,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
return None;
// Don't simulate loops with a big or unknown tripcount
- if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
- TripCount > UnrollMaxIterationsCountToAnalyze)
+ if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
return None;
SmallSetVector<BasicBlock *, 16> BBWorklist;
@@ -428,7 +466,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// First accumulate the cost of this instruction.
if (!Cost.IsFree) {
- UnrolledCost += TTI.getUserCost(I);
+ UnrolledCost += TTI.getUserCost(I, TargetTransformInfo::TCK_CodeSize);
LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
<< Iteration << "): ");
LLVM_DEBUG(I->dump());
@@ -521,7 +559,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// Track this instruction's expected baseline cost when executing the
// rolled loop form.
- RolledDynamicCost += TTI.getUserCost(&I);
+ RolledDynamicCost += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
// Visit the instruction to analyze its loop cost after unrolling,
// and if the visitor returns true, mark the instruction as free after
@@ -665,32 +703,32 @@ unsigned llvm::ApproximateLoopSize(
// Returns the loop hint metadata node with the given name (for example,
// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
// returned.
-static MDNode *GetUnrollMetadataForLoop(const Loop *L, StringRef Name) {
+static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) {
if (MDNode *LoopID = L->getLoopID())
return GetUnrollMetadata(LoopID, Name);
return nullptr;
}
// Returns true if the loop has an unroll(full) pragma.
-static bool HasUnrollFullPragma(const Loop *L) {
- return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
+static bool hasUnrollFullPragma(const Loop *L) {
+ return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
}
// Returns true if the loop has an unroll(enable) pragma. This metadata is used
// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
-static bool HasUnrollEnablePragma(const Loop *L) {
- return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
+static bool hasUnrollEnablePragma(const Loop *L) {
+ return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
}
// Returns true if the loop has an runtime unroll(disable) pragma.
-static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
- return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
+static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
+ return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
}
// If loop has an unroll_count pragma return the (necessarily
// positive) value from the pragma. Otherwise return 0.
-static unsigned UnrollCountPragmaValue(const Loop *L) {
- MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
+static unsigned unrollCountPragmaValue(const Loop *L) {
+ MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
if (MD) {
assert(MD->getNumOperands() == 2 &&
"Unroll count hint metadata should have two operands.");
@@ -740,7 +778,8 @@ bool llvm::computeUnrollCount(
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize,
- TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
+ TargetTransformInfo::UnrollingPreferences &UP,
+ TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
@@ -754,7 +793,7 @@ bool llvm::computeUnrollCount(
}
// 2nd priority is unroll count set by pragma.
- unsigned PragmaCount = UnrollCountPragmaValue(L);
+ unsigned PragmaCount = unrollCountPragmaValue(L);
if (PragmaCount > 0) {
UP.Count = PragmaCount;
UP.Runtime = true;
@@ -764,14 +803,14 @@ bool llvm::computeUnrollCount(
getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
return true;
}
- bool PragmaFullUnroll = HasUnrollFullPragma(L);
+ bool PragmaFullUnroll = hasUnrollFullPragma(L);
if (PragmaFullUnroll && TripCount != 0) {
UP.Count = TripCount;
if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
return false;
}
- bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
+ bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
@@ -827,7 +866,8 @@ bool llvm::computeUnrollCount(
// To check that, run additional analysis on the loop.
if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
L, FullUnrollTripCount, DT, SE, EphValues, TTI,
- UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
+ UP.Threshold * UP.MaxPercentThresholdBoost / 100,
+ UP.MaxIterationsCountToAnalyze)) {
unsigned Boost =
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
@@ -841,8 +881,8 @@ bool llvm::computeUnrollCount(
}
// 4th priority is loop peeling.
- computePeelCount(L, LoopSize, UP, TripCount, SE);
- if (UP.PeelCount) {
+ computePeelCount(L, LoopSize, UP, PP, TripCount, SE);
+ if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
return ExplicitUnroll;
@@ -925,7 +965,7 @@ bool llvm::computeUnrollCount(
// 6th priority is runtime unrolling.
// Don't unroll a runtime trip count loop when it is disabled.
- if (HasRuntimeUnrollDisablePragma(L)) {
+ if (hasRuntimeUnrollDisablePragma(L)) {
UP.Count = 0;
return false;
}
@@ -1045,8 +1085,9 @@ static LoopUnrollResult tryToUnrollLoop(
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
- ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
ProvidedFullUnrollMaxCount);
+ TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
+ L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling);
// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
// as threshold later on.
@@ -1120,7 +1161,7 @@ static LoopUnrollResult tryToUnrollLoop(
bool UseUpperBound = false;
bool IsCountSetExplicitly = computeUnrollCount(
L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
- TripMultiple, LoopSize, UP, UseUpperBound);
+ TripMultiple, LoopSize, UP, PP, UseUpperBound);
if (!UP.Count)
return LoopUnrollResult::Unmodified;
// Unroll factor (Count) must be less or equal to TripCount.
@@ -1135,9 +1176,9 @@ static LoopUnrollResult tryToUnrollLoop(
LoopUnrollResult UnrollResult = UnrollLoop(
L,
{UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
- UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+ UseUpperBound, MaxOrZero, TripMultiple, PP.PeelCount, UP.UnrollRemainder,
ForgetAllSCEV},
- LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
+ LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
if (UnrollResult == LoopUnrollResult::Unmodified)
return LoopUnrollResult::Unmodified;
@@ -1167,7 +1208,7 @@ static LoopUnrollResult tryToUnrollLoop(
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
- (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
+ (IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount)))
L->setLoopAlreadyUnrolled();
return UnrollResult;
@@ -1296,16 +1337,10 @@ Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &Updater) {
- const auto &FAM =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
- Function *F = L.getHeader()->getParent();
-
- auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
- // FIXME: This should probably be optional rather than required.
- if (!ORE)
- report_fatal_error(
- "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
- "cached at a higher level");
+ // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
+ // pass. Function analyses need to be preserved across loop transformations
+ // but ORE cannot be preserved (see comment before the pass definition).
+ OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
// Keep track of the previous loop structure so we can identify new loops
// created by unrolling.
@@ -1316,9 +1351,9 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
else
OldLoops.insert(AR.LI.begin(), AR.LI.end());
- std::string LoopName = L.getName();
+ std::string LoopName = std::string(L.getName());
- bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
+ bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
/*BFI*/ nullptr, /*PSI*/ nullptr,
/*PreserveLCSSA*/ true, OptLevel,
OnlyWhenForced, ForgetSCEV, /*Count*/ None,
@@ -1384,30 +1419,6 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
return getLoopPassPreservedAnalyses();
}
-template <typename RangeT>
-static SmallVector<Loop *, 8> appendLoopsToWorklist(RangeT &&Loops) {
- SmallVector<Loop *, 8> Worklist;
- // We use an internal worklist to build up the preorder traversal without
- // recursion.
- SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
-
- for (Loop *RootL : Loops) {
- assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
- assert(PreOrderWorklist.empty() &&
- "Must start with an empty preorder walk worklist.");
- PreOrderWorklist.push_back(RootL);
- do {
- Loop *L = PreOrderWorklist.pop_back_val();
- PreOrderWorklist.append(L->begin(), L->end());
- PreOrderLoops.push_back(L);
- } while (!PreOrderWorklist.empty());
-
- Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
- PreOrderLoops.clear();
- }
- return Worklist;
-}
-
PreservedAnalyses LoopUnrollPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
@@ -1421,10 +1432,9 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
LAM = &LAMProxy->getManager();
- const ModuleAnalysisManager &MAM =
- AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
ProfileSummaryInfo *PSI =
- MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
@@ -1441,7 +1451,10 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
}
- SmallVector<Loop *, 8> Worklist = appendLoopsToWorklist(LI);
+ // Add the loop nests in the reverse order of LoopInfo. See method
+ // declaration.
+ SmallPriorityWorklist<Loop *, 4> Worklist;
+ appendLoopsToWorklist(LI, Worklist);
while (!Worklist.empty()) {
// Because the LoopInfo stores the loops in RPO, we walk the worklist
@@ -1459,7 +1472,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
if (PSI && PSI->hasHugeWorkingSetSize())
LocalAllowPeeling = false;
- std::string LoopName = L.getName();
+ std::string LoopName = std::string(L.getName());
// The API here is quite complex to call and we allow to select some
// flavors of unrolling during construction time (by setting UnrollOpts).
LoopUnrollResult Result = tryToUnrollLoop(