aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopUnrollPass.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
commiteb11fae6d08f479c0799db45860a98af528fa6e7 (patch)
tree44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Transforms/Scalar/LoopUnrollPass.cpp
parentb8a2042aa938069e862750553db0e4d82d25822c (diff)
downloadsrc-eb11fae6d08f479c0799db45860a98af528fa6e7.tar.gz
src-eb11fae6d08f479c0799db45860a98af528fa6e7.zip
Notes
Diffstat (limited to 'lib/Transforms/Scalar/LoopUnrollPass.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp149
1 files changed, 83 insertions, 66 deletions
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 15e7da5e1a7a..634215c9770f 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -53,6 +53,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
@@ -164,7 +165,7 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
-static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
+TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
@@ -191,6 +192,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.Force = false;
UP.UpperBound = false;
UP.AllowPeeling = true;
+ UP.UnrollAndJam = false;
+ UP.UnrollAndJamInnerLoopThreshold = 60;
// Override with any target specific settings
TTI.getUnrollingPreferences(L, SE, UP);
@@ -285,17 +288,17 @@ struct UnrolledInstStateKeyInfo {
};
struct EstimatedUnrollCost {
- /// \brief The estimated cost after unrolling.
+ /// The estimated cost after unrolling.
unsigned UnrolledCost;
- /// \brief The estimated dynamic cost of executing the instructions in the
+ /// The estimated dynamic cost of executing the instructions in the
/// rolled form.
unsigned RolledDynamicCost;
};
} // end anonymous namespace
-/// \brief Figure out if the loop is worth full unrolling.
+/// Figure out if the loop is worth full unrolling.
///
/// Complete loop unrolling can make some loads constant, and we need to know
/// if that would expose any further optimization opportunities. This routine
@@ -308,10 +311,10 @@ struct EstimatedUnrollCost {
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
-static Optional<EstimatedUnrollCost>
-analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
- ScalarEvolution &SE, const TargetTransformInfo &TTI,
- unsigned MaxUnrolledLoopSize) {
+static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
+ const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
+ const SmallPtrSetImpl<const Value *> &EphValues,
+ const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {
// We want to be able to scale offsets by the trip count and add more offsets
// to them without checking for overflows, and we already don't want to
// analyze *massive* trip counts, so we force the max to be reasonably small.
@@ -405,9 +408,9 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// First accumulate the cost of this instruction.
if (!Cost.IsFree) {
UnrolledCost += TTI.getUserCost(I);
- DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
- << "): ");
- DEBUG(I->dump());
+ LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
+ << Iteration << "): ");
+ LLVM_DEBUG(I->dump());
}
// We must count the cost of every operand which is not free,
@@ -442,14 +445,14 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
assert(L->isLCSSAForm(DT) &&
"Must have loops in LCSSA form to track live-out values.");
- DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
+ LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
// Simulate execution of each iteration of the loop counting instructions,
// which would be simplified.
// Since the same load will take different values on different iterations,
// we literally have to go through all loop's iterations.
for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
- DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
+ LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
// Prepare for the iteration by collecting any simplified entry or backedge
// inputs.
@@ -490,7 +493,9 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// it. We don't change the actual IR, just count optimization
// opportunities.
for (Instruction &I : *BB) {
- if (isa<DbgInfoIntrinsic>(I))
+ // These won't get into the final code - don't even try calculating the
+ // cost for them.
+ if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
continue;
// Track this instruction's expected baseline cost when executing the
@@ -512,8 +517,13 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// Can't properly model a cost of a call.
// FIXME: With a proper cost model we should be able to do it.
- if(isa<CallInst>(&I))
- return None;
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ const Function *Callee = CI->getCalledFunction();
+ if (!Callee || TTI.isLoweredToCall(Callee)) {
+ LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
+ return None;
+ }
+ }
// If the instruction might have a side-effect recursively account for
// the cost of it and all the instructions leading up to it.
@@ -522,10 +532,10 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// If unrolled body turns out to be too big, bail out.
if (UnrolledCost > MaxUnrolledLoopSize) {
- DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
- << " UnrolledCost: " << UnrolledCost
- << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
- << "\n");
+ LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
+ << " UnrolledCost: " << UnrolledCost
+ << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
+ << "\n");
return None;
}
}
@@ -578,8 +588,8 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// If we found no optimization opportunities on the first iteration, we
// won't find them on later ones too.
if (UnrolledCost == RolledDynamicCost) {
- DEBUG(dbgs() << " No opportunities found.. exiting.\n"
- << " UnrolledCost: " << UnrolledCost << "\n");
+ LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
+ << " UnrolledCost: " << UnrolledCost << "\n");
return None;
}
}
@@ -600,20 +610,17 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
}
}
- DEBUG(dbgs() << "Analysis finished:\n"
- << "UnrolledCost: " << UnrolledCost << ", "
- << "RolledDynamicCost: " << RolledDynamicCost << "\n");
+ LLVM_DEBUG(dbgs() << "Analysis finished:\n"
+ << "UnrolledCost: " << UnrolledCost << ", "
+ << "RolledDynamicCost: " << RolledDynamicCost << "\n");
return {{UnrolledCost, RolledDynamicCost}};
}
/// ApproximateLoopSize - Approximate the size of the loop.
-static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
- bool &NotDuplicatable, bool &Convergent,
- const TargetTransformInfo &TTI,
- AssumptionCache *AC, unsigned BEInsns) {
- SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(L, AC, EphValues);
-
+unsigned llvm::ApproximateLoopSize(
+ const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
+ const TargetTransformInfo &TTI,
+ const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
CodeMetrics Metrics;
for (BasicBlock *BB : L->blocks())
Metrics.analyzeBasicBlock(BB, TTI, EphValues);
@@ -706,10 +713,11 @@ static uint64_t getUnrolledLoopSize(
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
-static bool computeUnrollCount(
+bool llvm::computeUnrollCount(
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution &SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
- unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,
+ ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
+ OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
+ unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
@@ -729,7 +737,7 @@ static bool computeUnrollCount(
UP.Runtime = true;
UP.AllowExpensiveTripCount = true;
UP.Force = true;
- if (UP.AllowRemainder &&
+ if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
return true;
}
@@ -746,8 +754,8 @@ static bool computeUnrollCount(
if (ExplicitUnroll && TripCount != 0) {
// If the loop has an unrolling pragma, we want to be more aggressive with
- // unrolling limits. Set thresholds to at least the PragmaThreshold value
- // which is larger than the default limits.
+ // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
+ // value which is larger than the default limits.
UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
UP.PartialThreshold =
std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
@@ -763,7 +771,7 @@ static bool computeUnrollCount(
// compute the former when the latter is zero.
unsigned ExactTripCount = TripCount;
assert((ExactTripCount == 0 || MaxTripCount == 0) &&
- "ExtractTripCound and MaxTripCount cannot both be non zero.");
+ "ExtractTripCount and MaxTripCount cannot both be non zero.");
unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
UP.Count = FullUnrollTripCount;
if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
@@ -779,7 +787,7 @@ static bool computeUnrollCount(
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, FullUnrollTripCount, DT, SE, TTI,
+ L, FullUnrollTripCount, DT, SE, EphValues, TTI,
UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
unsigned Boost =
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
@@ -794,7 +802,7 @@ static bool computeUnrollCount(
}
// 4th priority is loop peeling
- computePeelCount(L, LoopSize, UP, TripCount);
+ computePeelCount(L, LoopSize, UP, TripCount, SE);
if (UP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
@@ -802,12 +810,12 @@ static bool computeUnrollCount(
}
// 5th priority is partial unrolling.
- // Try partial unroll only when TripCount could be staticaly calculated.
+ // Try partial unroll only when TripCount could be statically calculated.
if (TripCount) {
UP.Partial |= ExplicitUnroll;
if (!UP.Partial) {
- DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
+ LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
UP.Count = 0;
return false;
}
@@ -894,8 +902,9 @@ static bool computeUnrollCount(
// Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
- DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
- << "-unroll-runtime not given\n");
+ LLVM_DEBUG(
+ dbgs() << " will not try to unroll loop with runtime trip count "
+ << "-unroll-runtime not given\n");
UP.Count = 0;
return false;
}
@@ -915,12 +924,13 @@ static bool computeUnrollCount(
if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
while (UP.Count != 0 && TripMultiple % UP.Count != 0)
UP.Count >>= 1;
- DEBUG(dbgs() << "Remainder loop is restricted (that could architecture "
- "specific or because the loop contains a convergent "
- "instruction), so unroll count must divide the trip "
- "multiple, "
- << TripMultiple << ". Reducing unroll count from "
- << OrigCount << " to " << UP.Count << ".\n");
+ LLVM_DEBUG(
+ dbgs() << "Remainder loop is restricted (that could architecture "
+ "specific or because the loop contains a convergent "
+ "instruction), so unroll count must divide the trip "
+ "multiple, "
+ << TripMultiple << ". Reducing unroll count from " << OrigCount
+ << " to " << UP.Count << ".\n");
using namespace ore;
@@ -942,7 +952,8 @@ static bool computeUnrollCount(
if (UP.Count > UP.MaxCount)
UP.Count = UP.MaxCount;
- DEBUG(dbgs() << " partially unrolling with count: " << UP.Count << "\n");
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
+ << "\n");
if (UP.Count < 2)
UP.Count = 0;
return ExplicitUnroll;
@@ -955,12 +966,13 @@ static LoopUnrollResult tryToUnrollLoop(
Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold,
Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime,
Optional<bool> ProvidedUpperBound, Optional<bool> ProvidedAllowPeeling) {
- DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
- << "] Loop %" << L->getHeader()->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Loop Unroll: F["
+ << L->getHeader()->getParent()->getName() << "] Loop %"
+ << L->getHeader()->getName() << "\n");
if (HasUnrollDisablePragma(L))
return LoopUnrollResult::Unmodified;
if (!L->isLoopSimplifyForm()) {
- DEBUG(
+ LLVM_DEBUG(
dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
return LoopUnrollResult::Unmodified;
}
@@ -975,16 +987,21 @@ static LoopUnrollResult tryToUnrollLoop(
// Exit early if unrolling is disabled.
if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
return LoopUnrollResult::Unmodified;
- unsigned LoopSize = ApproximateLoopSize(
- L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns);
- DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
+
+ unsigned LoopSize =
+ ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
+ TTI, EphValues, UP.BEInsns);
+ LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
if (NotDuplicatable) {
- DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
- << " instructions.\n");
+ LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
+ << " instructions.\n");
return LoopUnrollResult::Unmodified;
}
if (NumInlineCandidates != 0) {
- DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
+ LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return LoopUnrollResult::Unmodified;
}
@@ -1030,7 +1047,7 @@ static LoopUnrollResult tryToUnrollLoop(
// loop tests remains the same compared to the non-unrolled version, whereas
// the generic upper bound unrolling keeps all but the last loop test so the
// number of loop tests goes up which may end up being worse on targets with
- // constriained branch predictor resources so is controlled by an option.)
+ // constrained branch predictor resources so is controlled by an option.)
// In addition we only unroll small upper bounds.
if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
MaxTripCount = 0;
@@ -1040,9 +1057,9 @@ static LoopUnrollResult tryToUnrollLoop(
// computeUnrollCount() decides whether it is beneficial to use upper bound to
// fully unroll the loop.
bool UseUpperBound = false;
- bool IsCountSetExplicitly =
- computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount,
- TripMultiple, LoopSize, UP, UseUpperBound);
+ bool IsCountSetExplicitly = computeUnrollCount(
+ L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
+ TripMultiple, LoopSize, UP, UseUpperBound);
if (!UP.Count)
return LoopUnrollResult::Unmodified;
// Unroll factor (Count) must be less or equal to TripCount.