diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopUnrollPass.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopUnrollPass.cpp | 440 |
1 files changed, 284 insertions, 156 deletions
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 530a68424d5c..7b1d6446a24a 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1,4 +1,4 @@ -//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===// +//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,29 +13,55 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopUnrollPass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/LoopUnrollAnalyzer.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include <climits> +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <limits> +#include <string> +#include <tuple> #include <utility> using namespace llvm; @@ -79,6 +105,10 @@ static cl::opt<unsigned> UnrollFullMaxCount( cl::desc( "Set the max unroll count for full unrolling, for testing purposes")); +static cl::opt<unsigned> UnrollPeelCount( + "unroll-peel-count", cl::Hidden, + cl::desc("Set the unroll peeling count, for testing purposes")); + static cl::opt<bool> UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " @@ -114,6 +144,10 @@ static cl::opt<bool> cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low.")); +static cl::opt<bool> UnrollUnrollRemainder( + "unroll-remainder", cl::Hidden, + cl::desc("Allow the loop remainder to be unrolled.")); + // This option isn't ever intended to be enabled, it serves to allow // experiments to check the assumptions about when this kind of revisit is // necessary. @@ -126,7 +160,7 @@ static cl::opt<bool> UnrollRevisitChildLoops( /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much /// code expansion would result. -static const unsigned NoThreshold = UINT_MAX; +static const unsigned NoThreshold = std::numeric_limits<unsigned>::max(); /// Gather the various unrolling parameters based on the defaults, compiler /// flags, TTI overrides and user specified parameters. @@ -134,7 +168,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional<unsigned> UserThreshold, Optional<unsigned> UserCount, Optional<bool> UserAllowPartial, Optional<bool> UserRuntime, - Optional<bool> UserUpperBound) { + Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults @@ -146,12 +180,13 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( UP.Count = 0; UP.PeelCount = 0; UP.DefaultUnrollRuntimeCount = 8; - UP.MaxCount = UINT_MAX; - UP.FullUnrollMaxCount = UINT_MAX; + UP.MaxCount = std::numeric_limits<unsigned>::max(); + UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max(); UP.BEInsns = 2; UP.Partial = false; UP.Runtime = false; UP.AllowRemainder = true; + UP.UnrollRemainder = false; UP.AllowExpensiveTripCount = false; UP.Force = false; UP.UpperBound = false; @@ -177,6 +212,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( UP.MaxCount = UnrollMaxCount; if (UnrollFullMaxCount.getNumOccurrences() > 0) UP.FullUnrollMaxCount = UnrollFullMaxCount; + if (UnrollPeelCount.getNumOccurrences() > 0) + UP.PeelCount = UnrollPeelCount; if (UnrollAllowPartial.getNumOccurrences() > 0) UP.Partial = UnrollAllowPartial; if (UnrollAllowRemainder.getNumOccurrences() > 0) @@ -187,6 +224,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( UP.UpperBound = false; if (UnrollAllowPeeling.getNumOccurrences() > 0) UP.AllowPeeling = UnrollAllowPeeling; + if (UnrollUnrollRemainder.getNumOccurrences() > 0) + UP.UnrollRemainder = UnrollUnrollRemainder; // Apply user values provided by argument if (UserThreshold.hasValue()) { @@ -201,11 +240,14 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( UP.Runtime = *UserRuntime; if (UserUpperBound.hasValue()) UP.UpperBound = *UserUpperBound; + if (UserAllowPeeling.hasValue()) + UP.AllowPeeling = *UserAllowPeeling; return UP; } namespace { + /// A struct to densely store the state of an instruction after unrolling at /// each iteration. /// @@ -221,25 +263,27 @@ struct UnrolledInstState { /// Hashing and equality testing for a set of the instruction states. struct UnrolledInstStateKeyInfo { - typedef DenseMapInfo<Instruction *> PtrInfo; - typedef DenseMapInfo<std::pair<Instruction *, int>> PairInfo; + using PtrInfo = DenseMapInfo<Instruction *>; + using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>; + static inline UnrolledInstState getEmptyKey() { return {PtrInfo::getEmptyKey(), 0, 0, 0}; } + static inline UnrolledInstState getTombstoneKey() { return {PtrInfo::getTombstoneKey(), 0, 0, 0}; } + static inline unsigned getHashValue(const UnrolledInstState &S) { return PairInfo::getHashValue({S.I, S.Iteration}); } + static inline bool isEqual(const UnrolledInstState &LHS, const UnrolledInstState &RHS) { return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration}); } }; -} -namespace { struct EstimatedUnrollCost { /// \brief The estimated cost after unrolling. unsigned UnrolledCost; @@ -248,7 +292,8 @@ struct EstimatedUnrollCost { /// rolled form. unsigned RolledDynamicCost; }; -} + +} // end anonymous namespace /// \brief Figure out if the loop is worth full unrolling. /// @@ -270,7 +315,8 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. - assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && + assert(UnrollMaxIterationsCountToAnalyze < + (unsigned)(std::numeric_limits<int>::max() / 2) && "The unroll iterations max is too large!"); // Only analyze inner loops. We can't properly estimate cost of nested loops @@ -633,43 +679,6 @@ static unsigned UnrollCountPragmaValue(const Loop *L) { return 0; } -// Remove existing unroll metadata and add unroll disable metadata to -// indicate the loop has already been unrolled. This prevents a loop -// from being unrolled more than is directed by a pragma if the loop -// unrolling pass is run more than once (which it generally is). -static void SetLoopAlreadyUnrolled(Loop *L) { - MDNode *LoopID = L->getLoopID(); - // First remove any existing loop unrolling metadata. - SmallVector<Metadata *, 4> MDs; - // Reserve first location for self reference to the LoopID metadata node. - MDs.push_back(nullptr); - - if (LoopID) { - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - bool IsUnrollMetadata = false; - MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); - if (MD) { - const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); - IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); - } - if (!IsUnrollMetadata) - MDs.push_back(LoopID->getOperand(i)); - } - } - - // Add unroll(disable) metadata to disable future unrolling. - LLVMContext &Context = L->getHeader()->getContext(); - SmallVector<Metadata *, 1> DisableOperands; - DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); - MDNode *DisableNode = MDNode::get(Context, DisableOperands); - MDs.push_back(DisableNode); - - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); - L->setLoopID(NewLoopID); -} - // Computes the boosting factor for complete unrolling. // If fully unrolling the loop would save a lot of RolledDynamicCost, it would // be beneficial to fully unroll the loop even if unrolledcost is large. We @@ -677,7 +686,7 @@ static void SetLoopAlreadyUnrolled(Loop *L) { // the unroll threshold. static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost) { - if (Cost.RolledDynamicCost >= UINT_MAX / 100) + if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100) return 100; else if (Cost.UnrolledCost != 0) // The boosting factor is RolledDynamicCost / UnrolledCost @@ -826,11 +835,14 @@ static bool computeUnrollCount( } if (UP.Count < 2) { if (PragmaEnableUnroll) - ORE->emit( - OptimizationRemarkMissed(DEBUG_TYPE, "UnrollAsDirectedTooLarge", - L->getStartLoc(), L->getHeader()) - << "Unable to unroll loop as directed by unroll(enable) pragma " - "because unrolled size is too large."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "UnrollAsDirectedTooLarge", + L->getStartLoc(), L->getHeader()) + << "Unable to unroll loop as directed by unroll(enable) " + "pragma " + "because unrolled size is too large."; + }); UP.Count = 0; } } else { @@ -840,22 +852,27 @@ static bool computeUnrollCount( UP.Count = UP.MaxCount; if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && UP.Count != TripCount) - ORE->emit( - OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollAsDirectedTooLarge", - L->getStartLoc(), L->getHeader()) - << "Unable to fully unroll loop as directed by unroll pragma because " - "unrolled size is too large."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "FullUnrollAsDirectedTooLarge", + L->getStartLoc(), L->getHeader()) + << "Unable to fully unroll loop as directed by unroll pragma " + "because " + "unrolled size is too large."; + }); return ExplicitUnroll; } assert(TripCount == 0 && "All cases when TripCount is constant should be covered here."); if (PragmaFullUnroll) - ORE->emit( - OptimizationRemarkMissed(DEBUG_TYPE, - "CantFullUnrollAsDirectedRuntimeTripCount", - L->getStartLoc(), L->getHeader()) - << "Unable to fully unroll loop as directed by unroll(full) pragma " - "because loop has a runtime trip count."); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount", + L->getStartLoc(), L->getHeader()) + << "Unable to fully unroll loop as directed by unroll(full) " + "pragma " + "because loop has a runtime trip count."; + }); // 6th priority is runtime unrolling. // Don't unroll a runtime trip count loop when it is disabled. @@ -904,19 +921,23 @@ static bool computeUnrollCount( "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"); + using namespace ore; + if (PragmaCount > 0 && !UP.AllowRemainder) - ORE->emit( - OptimizationRemarkMissed(DEBUG_TYPE, - "DifferentUnrollCountFromDirected", - L->getStartLoc(), L->getHeader()) - << "Unable to unroll loop the number of times directed by " - "unroll_count pragma because remainder loop is restricted " - "(that could architecture specific or because the loop " - "contains a convergent instruction) and so must have an unroll " - "count that divides the loop trip multiple of " - << NV("TripMultiple", TripMultiple) << ". Unrolling instead " - << NV("UnrollCount", UP.Count) << " time(s)."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "DifferentUnrollCountFromDirected", + L->getStartLoc(), L->getHeader()) + << "Unable to unroll loop the number of times directed by " + "unroll_count pragma because remainder loop is restricted " + "(that could architecture specific or because the loop " + "contains a convergent instruction) and so must have an " + "unroll " + "count that divides the loop trip multiple of " + << NV("TripMultiple", TripMultiple) << ". Unrolling instead " + << NV("UnrollCount", UP.Count) << " time(s)."; + }); } if (UP.Count > UP.MaxCount) @@ -927,23 +948,21 @@ static bool computeUnrollCount( return ExplicitUnroll; } -static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, - ScalarEvolution &SE, const TargetTransformInfo &TTI, - AssumptionCache &AC, OptimizationRemarkEmitter &ORE, - bool PreserveLCSSA, int OptLevel, - Optional<unsigned> ProvidedCount, - Optional<unsigned> ProvidedThreshold, - Optional<bool> ProvidedAllowPartial, - Optional<bool> ProvidedRuntime, - Optional<bool> ProvidedUpperBound) { +static LoopUnrollResult tryToUnrollLoop( + Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, + const TargetTransformInfo &TTI, AssumptionCache &AC, + OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, + Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold, + Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime, + Optional<bool> ProvidedUpperBound, Optional<bool> ProvidedAllowPeeling) { DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); - if (HasUnrollDisablePragma(L)) - return false; - if (!L->isLoopSimplifyForm()) { + if (HasUnrollDisablePragma(L)) + return LoopUnrollResult::Unmodified; + if (!L->isLoopSimplifyForm()) { DEBUG( dbgs() << " Not unrolling loop which is not in loop-simplify form.\n"); - return false; + return LoopUnrollResult::Unmodified; } unsigned NumInlineCandidates; @@ -951,21 +970,22 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, bool Convergent; TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount, - ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound); + ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, + ProvidedAllowPeeling); // Exit early if unrolling is disabled. if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0)) - return false; + return LoopUnrollResult::Unmodified; unsigned LoopSize = ApproximateLoopSize( L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (NotDuplicatable) { DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"); - return false; + return LoopUnrollResult::Unmodified; } if (NumInlineCandidates != 0) { DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); - return false; + return LoopUnrollResult::Unmodified; } // Find trip count and trip multiple if count is not available @@ -1024,41 +1044,35 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount, TripMultiple, LoopSize, UP, UseUpperBound); if (!UP.Count) - return false; + return LoopUnrollResult::Unmodified; // Unroll factor (Count) must be less or equal to TripCount. if (TripCount && UP.Count > TripCount) UP.Count = TripCount; // Unroll the loop. - if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime, - UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero, - TripMultiple, UP.PeelCount, LI, &SE, &DT, &AC, &ORE, - PreserveLCSSA)) - return false; + LoopUnrollResult UnrollResult = UnrollLoop( + L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, + UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder, + LI, &SE, &DT, &AC, &ORE, PreserveLCSSA); + if (UnrollResult == LoopUnrollResult::Unmodified) + return LoopUnrollResult::Unmodified; // If loop has an unroll count pragma or unrolled by explicitly set count // mark loop as unrolled to prevent unrolling beyond that requested. // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. - if (IsCountSetExplicitly || UP.PeelCount) - SetLoopAlreadyUnrolled(L); + if (UnrollResult != LoopUnrollResult::FullyUnrolled && + (IsCountSetExplicitly || UP.PeelCount)) + L->setLoopAlreadyUnrolled(); - return true; + return UnrollResult; } namespace { + class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None, - Optional<unsigned> Count = None, - Optional<bool> AllowPartial = None, Optional<bool> Runtime = None, - Optional<bool> UpperBound = None) - : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)), - ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), - ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) { - initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); - } int OptLevel; Optional<unsigned> ProvidedCount; @@ -1066,8 +1080,21 @@ public: Optional<bool> ProvidedAllowPartial; Optional<bool> ProvidedRuntime; Optional<bool> ProvidedUpperBound; + Optional<bool> ProvidedAllowPeeling; - bool runOnLoop(Loop *L, LPPassManager &) override { + LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None, + Optional<unsigned> Count = None, + Optional<bool> AllowPartial = None, Optional<bool> Runtime = None, + Optional<bool> UpperBound = None, + Optional<bool> AllowPeeling = None) + : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)), + ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), + ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound), + ProvidedAllowPeeling(AllowPeeling) { + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override { if (skipLoop(L)) return false; @@ -1085,15 +1112,19 @@ public: OptimizationRemarkEmitter ORE(&F); bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, - ProvidedCount, ProvidedThreshold, - ProvidedAllowPartial, ProvidedRuntime, - ProvidedUpperBound); + LoopUnrollResult Result = tryToUnrollLoop( + L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount, + ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, + ProvidedUpperBound, ProvidedAllowPeeling); + + if (Result == LoopUnrollResult::FullyUnrolled) + LPM.markLoopAsDeleted(*L); + + return Result != LoopUnrollResult::Unmodified; } /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG... - /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetTransformInfoWrapperPass>(); @@ -1102,9 +1133,11 @@ public: getLoopAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char LoopUnroll::ID = 0; + INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopPass) @@ -1112,8 +1145,8 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count, - int AllowPartial, int Runtime, - int UpperBound) { + int AllowPartial, int Runtime, int UpperBound, + int AllowPeeling) { // TODO: It would make more sense for this function to take the optionals // directly, but that's dangerous since it would silently break out of tree // callers. @@ -1122,16 +1155,17 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count, Count == -1 ? None : Optional<unsigned>(Count), AllowPartial == -1 ? None : Optional<bool>(AllowPartial), Runtime == -1 ? None : Optional<bool>(Runtime), - UpperBound == -1 ? None : Optional<bool>(UpperBound)); + UpperBound == -1 ? None : Optional<bool>(UpperBound), + AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling)); } Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) { - return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0); + return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0); } -PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &Updater) { +PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &Updater) { const auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); Function *F = L.getHeader()->getParent(); @@ -1139,8 +1173,9 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); // FIXME: This should probably be optional rather than required. if (!ORE) - report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not " - "cached at a higher level"); + report_fatal_error( + "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not " + "cached at a higher level"); // Keep track of the previous loop structure so we can identify new loops // created by unrolling. @@ -1151,17 +1186,14 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, else OldLoops.insert(AR.LI.begin(), AR.LI.end()); - // The API here is quite complex to call, but there are only two interesting - // states we support: partial and full (or "simple") unrolling. However, to - // enable these things we actually pass "None" in for the optional to avoid - // providing an explicit choice. - Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam; - if (!AllowPartialUnrolling) - AllowPartialParam = RuntimeParam = UpperBoundParam = false; - bool Changed = tryToUnrollLoop( - &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, - /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, - /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam); + std::string LoopName = L.getName(); + + bool Changed = + tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, + /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, + /*Threshold*/ None, /*AllowPartial*/ false, + /*Runtime*/ false, /*UpperBound*/ false, + /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified; if (!Changed) return PreservedAnalyses::all(); @@ -1172,17 +1204,13 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, #endif // Unrolling can do several things to introduce new loops into a loop nest: - // - Partial unrolling clones child loops within the current loop. If it - // uses a remainder, then it can also create any number of sibling loops. // - Full unrolling clones child loops within the current loop but then // removes the current loop making all of the children appear to be new // sibling loops. - // - Loop peeling can directly introduce new sibling loops by peeling one - // iteration. // - // When a new loop appears as a sibling loop, either from peeling an - // iteration or fully unrolling, its nesting structure has fundamentally - // changed and we want to revisit it to reflect that. + // When a new loop appears as a sibling loop after fully unrolling, + // its nesting structure has fundamentally changed and we want to revisit + // it to reflect that. // // When unrolling has removed the current loop, we need to tell the // infrastructure that it is gone. @@ -1209,13 +1237,11 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, Updater.addSiblingLoops(SibLoops); if (!IsCurrentLoopValid) { - Updater.markLoopAsDeleted(L); + Updater.markLoopAsDeleted(L, LoopName); } else { // We can only walk child loops if the current loop remained valid. if (UnrollRevisitChildLoops) { - // Walk *all* of the child loops. This is a highly speculative mode - // anyways so look for any simplifications that arose from partial - // unrolling or peeling off of iterations. + // Walk *all* of the child loops. SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end()); Updater.addChildLoops(ChildLoops); } @@ -1223,3 +1249,105 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } + +template <typename RangeT> +static SmallVector<Loop *, 8> appendLoopsToWorklist(RangeT &&Loops) { + SmallVector<Loop *, 8> Worklist; + // We use an internal worklist to build up the preorder traversal without + // recursion. + SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist; + + for (Loop *RootL : Loops) { + assert(PreOrderLoops.empty() && "Must start with an empty preorder walk."); + assert(PreOrderWorklist.empty() && + "Must start with an empty preorder walk worklist."); + PreOrderWorklist.push_back(RootL); + do { + Loop *L = PreOrderWorklist.pop_back_val(); + PreOrderWorklist.append(L->begin(), L->end()); + PreOrderLoops.push_back(L); + } while (!PreOrderWorklist.empty()); + + Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end()); + PreOrderLoops.clear(); + } + return Worklist; +} + +PreservedAnalyses LoopUnrollPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); + + LoopAnalysisManager *LAM = nullptr; + if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F)) + LAM = &LAMProxy->getManager(); + + const ModuleAnalysisManager &MAM = + AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); + ProfileSummaryInfo *PSI = + MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + + bool Changed = false; + + // The unroller requires loops to be in simplified form, and also needs LCSSA. + // Since simplification may add new inner loops, it has to run before the + // legality and profitability checks. This means running the loop unroller + // will simplify all loops, regardless of whether anything end up being + // unrolled. + for (auto &L : LI) { + Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */); + Changed |= formLCSSARecursively(*L, DT, &LI, &SE); + } + + SmallVector<Loop *, 8> Worklist = appendLoopsToWorklist(LI); + + while (!Worklist.empty()) { + // Because the LoopInfo stores the loops in RPO, we walk the worklist + // from back to front so that we work forward across the CFG, which + // for unrolling is only needed to get optimization remarks emitted in + // a forward order. + Loop &L = *Worklist.pop_back_val(); +#ifndef NDEBUG + Loop *ParentL = L.getParentLoop(); +#endif + + // The API here is quite complex to call, but there are only two interesting + // states we support: partial and full (or "simple") unrolling. However, to + // enable these things we actually pass "None" in for the optional to avoid + // providing an explicit choice. + Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam, + AllowPeeling; + // Check if the profile summary indicates that the profiled application + // has a huge working set size, in which case we disable peeling to avoid + // bloating it further. + if (PSI && PSI->hasHugeWorkingSetSize()) + AllowPeeling = false; + std::string LoopName = L.getName(); + LoopUnrollResult Result = + tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE, + /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, + /*Threshold*/ None, AllowPartialParam, RuntimeParam, + UpperBoundParam, AllowPeeling); + Changed |= Result != LoopUnrollResult::Unmodified; + + // The parent must not be damaged by unrolling! +#ifndef NDEBUG + if (Result != LoopUnrollResult::Unmodified && ParentL) + ParentL->verifyLoop(); +#endif + + // Clear any cached analysis results for L if we removed it completely. + if (LAM && Result == LoopUnrollResult::FullyUnrolled) + LAM->clear(L, LoopName); + } + + if (!Changed) + return PreservedAnalyses::all(); + + return getLoopPassPreservedAnalyses(); +} |