summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopUnrollPass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopUnrollPass.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp440
1 files changed, 284 insertions, 156 deletions
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 530a68424d5c..7b1d6446a24a 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1,4 +1,4 @@
-//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
+//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,29 +13,55 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include <climits>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <tuple>
#include <utility>
using namespace llvm;
@@ -79,6 +105,10 @@ static cl::opt<unsigned> UnrollFullMaxCount(
cl::desc(
"Set the max unroll count for full unrolling, for testing purposes"));
+static cl::opt<unsigned> UnrollPeelCount(
+ "unroll-peel-count", cl::Hidden,
+ cl::desc("Set the unroll peeling count, for testing purposes"));
+
static cl::opt<bool>
UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
@@ -114,6 +144,10 @@ static cl::opt<bool>
cl::desc("Allows loops to be peeled when the dynamic "
"trip count is known to be low."));
+static cl::opt<bool> UnrollUnrollRemainder(
+ "unroll-remainder", cl::Hidden,
+ cl::desc("Allow the loop remainder to be unrolled."));
+
// This option isn't ever intended to be enabled, it serves to allow
// experiments to check the assumptions about when this kind of revisit is
// necessary.
@@ -126,7 +160,7 @@ static cl::opt<bool> UnrollRevisitChildLoops(
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
-static const unsigned NoThreshold = UINT_MAX;
+static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
@@ -134,7 +168,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
- Optional<bool> UserUpperBound) {
+ Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
@@ -146,12 +180,13 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.Count = 0;
UP.PeelCount = 0;
UP.DefaultUnrollRuntimeCount = 8;
- UP.MaxCount = UINT_MAX;
- UP.FullUnrollMaxCount = UINT_MAX;
+ UP.MaxCount = std::numeric_limits<unsigned>::max();
+ UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
UP.BEInsns = 2;
UP.Partial = false;
UP.Runtime = false;
UP.AllowRemainder = true;
+ UP.UnrollRemainder = false;
UP.AllowExpensiveTripCount = false;
UP.Force = false;
UP.UpperBound = false;
@@ -177,6 +212,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.MaxCount = UnrollMaxCount;
if (UnrollFullMaxCount.getNumOccurrences() > 0)
UP.FullUnrollMaxCount = UnrollFullMaxCount;
+ if (UnrollPeelCount.getNumOccurrences() > 0)
+ UP.PeelCount = UnrollPeelCount;
if (UnrollAllowPartial.getNumOccurrences() > 0)
UP.Partial = UnrollAllowPartial;
if (UnrollAllowRemainder.getNumOccurrences() > 0)
@@ -187,6 +224,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.UpperBound = false;
if (UnrollAllowPeeling.getNumOccurrences() > 0)
UP.AllowPeeling = UnrollAllowPeeling;
+ if (UnrollUnrollRemainder.getNumOccurrences() > 0)
+ UP.UnrollRemainder = UnrollUnrollRemainder;
// Apply user values provided by argument
if (UserThreshold.hasValue()) {
@@ -201,11 +240,14 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.Runtime = *UserRuntime;
if (UserUpperBound.hasValue())
UP.UpperBound = *UserUpperBound;
+ if (UserAllowPeeling.hasValue())
+ UP.AllowPeeling = *UserAllowPeeling;
return UP;
}
namespace {
+
/// A struct to densely store the state of an instruction after unrolling at
/// each iteration.
///
@@ -221,25 +263,27 @@ struct UnrolledInstState {
/// Hashing and equality testing for a set of the instruction states.
struct UnrolledInstStateKeyInfo {
- typedef DenseMapInfo<Instruction *> PtrInfo;
- typedef DenseMapInfo<std::pair<Instruction *, int>> PairInfo;
+ using PtrInfo = DenseMapInfo<Instruction *>;
+ using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>;
+
static inline UnrolledInstState getEmptyKey() {
return {PtrInfo::getEmptyKey(), 0, 0, 0};
}
+
static inline UnrolledInstState getTombstoneKey() {
return {PtrInfo::getTombstoneKey(), 0, 0, 0};
}
+
static inline unsigned getHashValue(const UnrolledInstState &S) {
return PairInfo::getHashValue({S.I, S.Iteration});
}
+
static inline bool isEqual(const UnrolledInstState &LHS,
const UnrolledInstState &RHS) {
return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
}
};
-}
-namespace {
struct EstimatedUnrollCost {
/// \brief The estimated cost after unrolling.
unsigned UnrolledCost;
@@ -248,7 +292,8 @@ struct EstimatedUnrollCost {
/// rolled form.
unsigned RolledDynamicCost;
};
-}
+
+} // end anonymous namespace
/// \brief Figure out if the loop is worth full unrolling.
///
@@ -270,7 +315,8 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// We want to be able to scale offsets by the trip count and add more offsets
// to them without checking for overflows, and we already don't want to
// analyze *massive* trip counts, so we force the max to be reasonably small.
- assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
+ assert(UnrollMaxIterationsCountToAnalyze <
+ (unsigned)(std::numeric_limits<int>::max() / 2) &&
"The unroll iterations max is too large!");
// Only analyze inner loops. We can't properly estimate cost of nested loops
@@ -633,43 +679,6 @@ static unsigned UnrollCountPragmaValue(const Loop *L) {
return 0;
}
-// Remove existing unroll metadata and add unroll disable metadata to
-// indicate the loop has already been unrolled. This prevents a loop
-// from being unrolled more than is directed by a pragma if the loop
-// unrolling pass is run more than once (which it generally is).
-static void SetLoopAlreadyUnrolled(Loop *L) {
- MDNode *LoopID = L->getLoopID();
- // First remove any existing loop unrolling metadata.
- SmallVector<Metadata *, 4> MDs;
- // Reserve first location for self reference to the LoopID metadata node.
- MDs.push_back(nullptr);
-
- if (LoopID) {
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- bool IsUnrollMetadata = false;
- MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
- if (MD) {
- const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
- IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
- }
- if (!IsUnrollMetadata)
- MDs.push_back(LoopID->getOperand(i));
- }
- }
-
- // Add unroll(disable) metadata to disable future unrolling.
- LLVMContext &Context = L->getHeader()->getContext();
- SmallVector<Metadata *, 1> DisableOperands;
- DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
- MDNode *DisableNode = MDNode::get(Context, DisableOperands);
- MDs.push_back(DisableNode);
-
- MDNode *NewLoopID = MDNode::get(Context, MDs);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
- L->setLoopID(NewLoopID);
-}
-
// Computes the boosting factor for complete unrolling.
// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
// be beneficial to fully unroll the loop even if unrolledcost is large. We
@@ -677,7 +686,7 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
// the unroll threshold.
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
unsigned MaxPercentThresholdBoost) {
- if (Cost.RolledDynamicCost >= UINT_MAX / 100)
+ if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
return 100;
else if (Cost.UnrolledCost != 0)
// The boosting factor is RolledDynamicCost / UnrolledCost
@@ -826,11 +835,14 @@ static bool computeUnrollCount(
}
if (UP.Count < 2) {
if (PragmaEnableUnroll)
- ORE->emit(
- OptimizationRemarkMissed(DEBUG_TYPE, "UnrollAsDirectedTooLarge",
- L->getStartLoc(), L->getHeader())
- << "Unable to unroll loop as directed by unroll(enable) pragma "
- "because unrolled size is too large.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "UnrollAsDirectedTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to unroll loop as directed by unroll(enable) "
+ "pragma "
+ "because unrolled size is too large.";
+ });
UP.Count = 0;
}
} else {
@@ -840,22 +852,27 @@ static bool computeUnrollCount(
UP.Count = UP.MaxCount;
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
UP.Count != TripCount)
- ORE->emit(
- OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollAsDirectedTooLarge",
- L->getStartLoc(), L->getHeader())
- << "Unable to fully unroll loop as directed by unroll pragma because "
- "unrolled size is too large.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "FullUnrollAsDirectedTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to fully unroll loop as directed by unroll pragma "
+ "because "
+ "unrolled size is too large.";
+ });
return ExplicitUnroll;
}
assert(TripCount == 0 &&
"All cases when TripCount is constant should be covered here.");
if (PragmaFullUnroll)
- ORE->emit(
- OptimizationRemarkMissed(DEBUG_TYPE,
- "CantFullUnrollAsDirectedRuntimeTripCount",
- L->getStartLoc(), L->getHeader())
- << "Unable to fully unroll loop as directed by unroll(full) pragma "
- "because loop has a runtime trip count.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(
+ DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to fully unroll loop as directed by unroll(full) "
+ "pragma "
+ "because loop has a runtime trip count.";
+ });
// 6th priority is runtime unrolling.
// Don't unroll a runtime trip count loop when it is disabled.
@@ -904,19 +921,23 @@ static bool computeUnrollCount(
"multiple, "
<< TripMultiple << ". Reducing unroll count from "
<< OrigCount << " to " << UP.Count << ".\n");
+
using namespace ore;
+
if (PragmaCount > 0 && !UP.AllowRemainder)
- ORE->emit(
- OptimizationRemarkMissed(DEBUG_TYPE,
- "DifferentUnrollCountFromDirected",
- L->getStartLoc(), L->getHeader())
- << "Unable to unroll loop the number of times directed by "
- "unroll_count pragma because remainder loop is restricted "
- "(that could architecture specific or because the loop "
- "contains a convergent instruction) and so must have an unroll "
- "count that divides the loop trip multiple of "
- << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
- << NV("UnrollCount", UP.Count) << " time(s).");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "DifferentUnrollCountFromDirected",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to unroll loop the number of times directed by "
+ "unroll_count pragma because remainder loop is restricted "
+ "(that could architecture specific or because the loop "
+ "contains a convergent instruction) and so must have an "
+ "unroll "
+ "count that divides the loop trip multiple of "
+ << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
+ << NV("UnrollCount", UP.Count) << " time(s).";
+ });
}
if (UP.Count > UP.MaxCount)
@@ -927,23 +948,21 @@ static bool computeUnrollCount(
return ExplicitUnroll;
}
-static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution &SE, const TargetTransformInfo &TTI,
- AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
- bool PreserveLCSSA, int OptLevel,
- Optional<unsigned> ProvidedCount,
- Optional<unsigned> ProvidedThreshold,
- Optional<bool> ProvidedAllowPartial,
- Optional<bool> ProvidedRuntime,
- Optional<bool> ProvidedUpperBound) {
+static LoopUnrollResult tryToUnrollLoop(
+ Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI, AssumptionCache &AC,
+ OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
+ Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold,
+ Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime,
+ Optional<bool> ProvidedUpperBound, Optional<bool> ProvidedAllowPeeling) {
DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
<< "] Loop %" << L->getHeader()->getName() << "\n");
- if (HasUnrollDisablePragma(L))
- return false;
- if (!L->isLoopSimplifyForm()) {
+ if (HasUnrollDisablePragma(L))
+ return LoopUnrollResult::Unmodified;
+ if (!L->isLoopSimplifyForm()) {
DEBUG(
dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
unsigned NumInlineCandidates;
@@ -951,21 +970,22 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
- ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound);
+ ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
+ ProvidedAllowPeeling);
// Exit early if unrolling is disabled.
if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
- return false;
+ return LoopUnrollResult::Unmodified;
unsigned LoopSize = ApproximateLoopSize(
L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
if (NotDuplicatable) {
DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
<< " instructions.\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
if (NumInlineCandidates != 0) {
DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
// Find trip count and trip multiple if count is not available
@@ -1024,41 +1044,35 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount,
TripMultiple, LoopSize, UP, UseUpperBound);
if (!UP.Count)
- return false;
+ return LoopUnrollResult::Unmodified;
// Unroll factor (Count) must be less or equal to TripCount.
if (TripCount && UP.Count > TripCount)
UP.Count = TripCount;
// Unroll the loop.
- if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
- UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
- TripMultiple, UP.PeelCount, LI, &SE, &DT, &AC, &ORE,
- PreserveLCSSA))
- return false;
+ LoopUnrollResult UnrollResult = UnrollLoop(
+ L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
+ UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+ LI, &SE, &DT, &AC, &ORE, PreserveLCSSA);
+ if (UnrollResult == LoopUnrollResult::Unmodified)
+ return LoopUnrollResult::Unmodified;
// If loop has an unroll count pragma or unrolled by explicitly set count
// mark loop as unrolled to prevent unrolling beyond that requested.
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
- if (IsCountSetExplicitly || UP.PeelCount)
- SetLoopAlreadyUnrolled(L);
+ if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
+ (IsCountSetExplicitly || UP.PeelCount))
+ L->setLoopAlreadyUnrolled();
- return true;
+ return UnrollResult;
}
namespace {
+
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
- Optional<unsigned> Count = None,
- Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
- Optional<bool> UpperBound = None)
- : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
- ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
- ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
- initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
- }
int OptLevel;
Optional<unsigned> ProvidedCount;
@@ -1066,8 +1080,21 @@ public:
Optional<bool> ProvidedAllowPartial;
Optional<bool> ProvidedRuntime;
Optional<bool> ProvidedUpperBound;
+ Optional<bool> ProvidedAllowPeeling;
- bool runOnLoop(Loop *L, LPPassManager &) override {
+ LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
+ Optional<unsigned> Count = None,
+ Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
+ Optional<bool> UpperBound = None,
+ Optional<bool> AllowPeeling = None)
+ : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
+ ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
+ ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
+ ProvidedAllowPeeling(AllowPeeling) {
+ initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
if (skipLoop(L))
return false;
@@ -1085,15 +1112,19 @@ public:
OptimizationRemarkEmitter ORE(&F);
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
- return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel,
- ProvidedCount, ProvidedThreshold,
- ProvidedAllowPartial, ProvidedRuntime,
- ProvidedUpperBound);
+ LoopUnrollResult Result = tryToUnrollLoop(
+ L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount,
+ ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
+ ProvidedUpperBound, ProvidedAllowPeeling);
+
+ if (Result == LoopUnrollResult::FullyUnrolled)
+ LPM.markLoopAsDeleted(*L);
+
+ return Result != LoopUnrollResult::Unmodified;
}
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
- ///
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
@@ -1102,9 +1133,11 @@ public:
getLoopAnalysisUsage(AU);
}
};
-}
+
+} // end anonymous namespace
char LoopUnroll::ID = 0;
+
INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
@@ -1112,8 +1145,8 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
- int AllowPartial, int Runtime,
- int UpperBound) {
+ int AllowPartial, int Runtime, int UpperBound,
+ int AllowPeeling) {
// TODO: It would make more sense for this function to take the optionals
// directly, but that's dangerous since it would silently break out of tree
// callers.
@@ -1122,16 +1155,17 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
Count == -1 ? None : Optional<unsigned>(Count),
AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
Runtime == -1 ? None : Optional<bool>(Runtime),
- UpperBound == -1 ? None : Optional<bool>(UpperBound));
+ UpperBound == -1 ? None : Optional<bool>(UpperBound),
+ AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
}
Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) {
- return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0);
+ return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0);
}
-PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &Updater) {
+PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &Updater) {
const auto &FAM =
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();
@@ -1139,8 +1173,9 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
// FIXME: This should probably be optional rather than required.
if (!ORE)
- report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
- "cached at a higher level");
+ report_fatal_error(
+ "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
+ "cached at a higher level");
// Keep track of the previous loop structure so we can identify new loops
// created by unrolling.
@@ -1151,17 +1186,14 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
else
OldLoops.insert(AR.LI.begin(), AR.LI.end());
- // The API here is quite complex to call, but there are only two interesting
- // states we support: partial and full (or "simple") unrolling. However, to
- // enable these things we actually pass "None" in for the optional to avoid
- // providing an explicit choice.
- Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam;
- if (!AllowPartialUnrolling)
- AllowPartialParam = RuntimeParam = UpperBoundParam = false;
- bool Changed = tryToUnrollLoop(
- &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
- /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
- /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam);
+ std::string LoopName = L.getName();
+
+ bool Changed =
+ tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
+ /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
+ /*Threshold*/ None, /*AllowPartial*/ false,
+ /*Runtime*/ false, /*UpperBound*/ false,
+ /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
if (!Changed)
return PreservedAnalyses::all();
@@ -1172,17 +1204,13 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
#endif
// Unrolling can do several things to introduce new loops into a loop nest:
- // - Partial unrolling clones child loops within the current loop. If it
- // uses a remainder, then it can also create any number of sibling loops.
// - Full unrolling clones child loops within the current loop but then
// removes the current loop making all of the children appear to be new
// sibling loops.
- // - Loop peeling can directly introduce new sibling loops by peeling one
- // iteration.
//
- // When a new loop appears as a sibling loop, either from peeling an
- // iteration or fully unrolling, its nesting structure has fundamentally
- // changed and we want to revisit it to reflect that.
+ // When a new loop appears as a sibling loop after fully unrolling,
+ // its nesting structure has fundamentally changed and we want to revisit
+ // it to reflect that.
//
// When unrolling has removed the current loop, we need to tell the
// infrastructure that it is gone.
@@ -1209,13 +1237,11 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
Updater.addSiblingLoops(SibLoops);
if (!IsCurrentLoopValid) {
- Updater.markLoopAsDeleted(L);
+ Updater.markLoopAsDeleted(L, LoopName);
} else {
// We can only walk child loops if the current loop remained valid.
if (UnrollRevisitChildLoops) {
- // Walk *all* of the child loops. This is a highly speculative mode
- // anyways so look for any simplifications that arose from partial
- // unrolling or peeling off of iterations.
+ // Walk *all* of the child loops.
SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
Updater.addChildLoops(ChildLoops);
}
@@ -1223,3 +1249,105 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
return getLoopPassPreservedAnalyses();
}
+
+template <typename RangeT>
+static SmallVector<Loop *, 8> appendLoopsToWorklist(RangeT &&Loops) {
+ SmallVector<Loop *, 8> Worklist;
+ // We use an internal worklist to build up the preorder traversal without
+ // recursion.
+ SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
+
+ for (Loop *RootL : Loops) {
+ assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
+ assert(PreOrderWorklist.empty() &&
+ "Must start with an empty preorder walk worklist.");
+ PreOrderWorklist.push_back(RootL);
+ do {
+ Loop *L = PreOrderWorklist.pop_back_val();
+ PreOrderWorklist.append(L->begin(), L->end());
+ PreOrderLoops.push_back(L);
+ } while (!PreOrderWorklist.empty());
+
+ Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
+ PreOrderLoops.clear();
+ }
+ return Worklist;
+}
+
+PreservedAnalyses LoopUnrollPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+ LoopAnalysisManager *LAM = nullptr;
+ if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
+ LAM = &LAMProxy->getManager();
+
+ const ModuleAnalysisManager &MAM =
+ AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ ProfileSummaryInfo *PSI =
+ MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+
+ bool Changed = false;
+
+ // The unroller requires loops to be in simplified form, and also needs LCSSA.
+ // Since simplification may add new inner loops, it has to run before the
+ // legality and profitability checks. This means running the loop unroller
+ // will simplify all loops, regardless of whether anything end up being
+ // unrolled.
+ for (auto &L : LI) {
+ Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */);
+ Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
+ }
+
+ SmallVector<Loop *, 8> Worklist = appendLoopsToWorklist(LI);
+
+ while (!Worklist.empty()) {
+ // Because the LoopInfo stores the loops in RPO, we walk the worklist
+ // from back to front so that we work forward across the CFG, which
+ // for unrolling is only needed to get optimization remarks emitted in
+ // a forward order.
+ Loop &L = *Worklist.pop_back_val();
+#ifndef NDEBUG
+ Loop *ParentL = L.getParentLoop();
+#endif
+
+ // The API here is quite complex to call, but there are only two interesting
+ // states we support: partial and full (or "simple") unrolling. However, to
+ // enable these things we actually pass "None" in for the optional to avoid
+ // providing an explicit choice.
+ Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam,
+ AllowPeeling;
+ // Check if the profile summary indicates that the profiled application
+ // has a huge working set size, in which case we disable peeling to avoid
+ // bloating it further.
+ if (PSI && PSI->hasHugeWorkingSetSize())
+ AllowPeeling = false;
+ std::string LoopName = L.getName();
+ LoopUnrollResult Result =
+ tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE,
+ /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
+ /*Threshold*/ None, AllowPartialParam, RuntimeParam,
+ UpperBoundParam, AllowPeeling);
+ Changed |= Result != LoopUnrollResult::Unmodified;
+
+ // The parent must not be damaged by unrolling!
+#ifndef NDEBUG
+ if (Result != LoopUnrollResult::Unmodified && ParentL)
+ ParentL->verifyLoop();
+#endif
+
+ // Clear any cached analysis results for L if we removed it completely.
+ if (LAM && Result == LoopUnrollResult::FullyUnrolled)
+ LAM->clear(L, LoopName);
+ }
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}