diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopUnrollPass.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopUnrollPass.cpp | 172 |
1 files changed, 130 insertions, 42 deletions
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index c7f91226d222..62aa6ee48069 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -44,7 +44,11 @@ using namespace llvm; static cl::opt<unsigned> UnrollThreshold("unroll-threshold", cl::Hidden, - cl::desc("The baseline cost threshold for loop unrolling")); + cl::desc("The cost threshold for loop unrolling")); + +static cl::opt<unsigned> UnrollPartialThreshold( + "unroll-partial-threshold", cl::Hidden, + cl::desc("The cost threshold for partial loop unrolling")); static cl::opt<unsigned> UnrollMaxPercentThresholdBoost( "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, @@ -106,10 +110,19 @@ static cl::opt<unsigned> FlatLoopTripCountThreshold( "aggressively unrolled.")); static cl::opt<bool> - UnrollAllowPeeling("unroll-allow-peeling", cl::Hidden, + UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low.")); +// This option isn't ever intended to be enabled, it serves to allow +// experiments to check the assumptions about when this kind of revisit is +// necessary. +static cl::opt<bool> UnrollRevisitChildLoops( + "unroll-revisit-child-loops", cl::Hidden, + cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " + "This shouldn't typically be needed as child loops (or their " + "clones) were already visited.")); + /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much /// code expansion would result. @@ -118,16 +131,17 @@ static const unsigned NoThreshold = UINT_MAX; /// Gather the various unrolling parameters based on the defaults, compiler /// flags, TTI overrides and user specified parameters. static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( - Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold, - Optional<unsigned> UserCount, Optional<bool> UserAllowPartial, - Optional<bool> UserRuntime, Optional<bool> UserUpperBound) { + Loop *L, const TargetTransformInfo &TTI, int OptLevel, + Optional<unsigned> UserThreshold, Optional<unsigned> UserCount, + Optional<bool> UserAllowPartial, Optional<bool> UserRuntime, + Optional<bool> UserUpperBound) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults - UP.Threshold = 150; + UP.Threshold = OptLevel > 2 ? 300 : 150; UP.MaxPercentThresholdBoost = 400; UP.OptSizeThreshold = 0; - UP.PartialThreshold = UP.Threshold; + UP.PartialThreshold = 150; UP.PartialOptSizeThreshold = 0; UP.Count = 0; UP.PeelCount = 0; @@ -141,7 +155,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( UP.AllowExpensiveTripCount = false; UP.Force = false; UP.UpperBound = false; - UP.AllowPeeling = false; + UP.AllowPeeling = true; // Override with any target specific settings TTI.getUnrollingPreferences(L, UP); @@ -153,10 +167,10 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( } // Apply any user values specified by cl::opt - if (UnrollThreshold.getNumOccurrences() > 0) { + if (UnrollThreshold.getNumOccurrences() > 0) UP.Threshold = UnrollThreshold; - UP.PartialThreshold = UnrollThreshold; - } + if (UnrollPartialThreshold.getNumOccurrences() > 0) + UP.PartialThreshold = UnrollPartialThreshold; if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0) UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost; if (UnrollMaxCount.getNumOccurrences() > 0) @@ -495,7 +509,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, KnownSucc = SI->getSuccessor(0); else if (ConstantInt *SimpleCondVal = dyn_cast<ConstantInt>(SimpleCond)) - KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor(); + KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor(); } } if (KnownSucc) { @@ -770,7 +784,15 @@ static bool computeUnrollCount( } } - // 4rd priority is partial unrolling. + // 4th priority is loop peeling + computePeelCount(L, LoopSize, UP, TripCount); + if (UP.PeelCount) { + UP.Runtime = false; + UP.Count = 1; + return ExplicitUnroll; + } + + // 5th priority is partial unrolling. // Try partial unroll only when TripCount could be staticaly calculated. if (TripCount) { UP.Partial |= ExplicitUnroll; @@ -833,14 +855,6 @@ static bool computeUnrollCount( << "Unable to fully unroll loop as directed by unroll(full) pragma " "because loop has a runtime trip count."); - // 5th priority is loop peeling - computePeelCount(L, LoopSize, UP); - if (UP.PeelCount) { - UP.Runtime = false; - UP.Count = 1; - return ExplicitUnroll; - } - // 6th priority is runtime unrolling. // Don't unroll a runtime trip count loop when it is disabled. if (HasRuntimeUnrollDisablePragma(L)) { @@ -914,7 +928,7 @@ static bool computeUnrollCount( static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, - bool PreserveLCSSA, + bool PreserveLCSSA, int OptLevel, Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial, @@ -934,7 +948,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, bool NotDuplicatable; bool Convergent; TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, + L, TTI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound); // Exit early if unrolling is disabled. if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0)) @@ -1034,16 +1048,17 @@ namespace { class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll(Optional<unsigned> Threshold = None, + LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None, Optional<unsigned> Count = None, Optional<bool> AllowPartial = None, Optional<bool> Runtime = None, Optional<bool> UpperBound = None) - : LoopPass(ID), ProvidedCount(std::move(Count)), + : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) { initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } + int OptLevel; Optional<unsigned> ProvidedCount; Optional<unsigned> ProvidedThreshold; Optional<bool> ProvidedAllowPartial; @@ -1068,7 +1083,7 @@ public: OptimizationRemarkEmitter ORE(&F); bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, + return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound); @@ -1094,26 +1109,27 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) -Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial, - int Runtime, int UpperBound) { +Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count, + int AllowPartial, int Runtime, + int UpperBound) { // TODO: It would make more sense for this function to take the optionals // directly, but that's dangerous since it would silently break out of tree // callers. - return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold), - Count == -1 ? None : Optional<unsigned>(Count), - AllowPartial == -1 ? None - : Optional<bool>(AllowPartial), - Runtime == -1 ? None : Optional<bool>(Runtime), - UpperBound == -1 ? None : Optional<bool>(UpperBound)); + return new LoopUnroll( + OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold), + Count == -1 ? None : Optional<unsigned>(Count), + AllowPartial == -1 ? None : Optional<bool>(AllowPartial), + Runtime == -1 ? None : Optional<bool>(Runtime), + UpperBound == -1 ? None : Optional<bool>(UpperBound)); } -Pass *llvm::createSimpleLoopUnrollPass() { - return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0); +Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) { + return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0); } PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, - LPMUpdater &) { + LPMUpdater &Updater) { const auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); Function *F = L.getHeader()->getParent(); @@ -1124,12 +1140,84 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not " "cached at a higher level"); - bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE, - /*PreserveLCSSA*/ true, ProvidedCount, - ProvidedThreshold, ProvidedAllowPartial, - ProvidedRuntime, ProvidedUpperBound); - + // Keep track of the previous loop structure so we can identify new loops + // created by unrolling. + Loop *ParentL = L.getParentLoop(); + SmallPtrSet<Loop *, 4> OldLoops; + if (ParentL) + OldLoops.insert(ParentL->begin(), ParentL->end()); + else + OldLoops.insert(AR.LI.begin(), AR.LI.end()); + + // The API here is quite complex to call, but there are only two interesting + // states we support: partial and full (or "simple") unrolling. However, to + // enable these things we actually pass "None" in for the optional to avoid + // providing an explicit choice. + Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam; + if (!AllowPartialUnrolling) + AllowPartialParam = RuntimeParam = UpperBoundParam = false; + bool Changed = tryToUnrollLoop( + &L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE, + /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, + /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam); if (!Changed) return PreservedAnalyses::all(); + + // The parent must not be damaged by unrolling! +#ifndef NDEBUG + if (ParentL) + ParentL->verifyLoop(); +#endif + + // Unrolling can do several things to introduce new loops into a loop nest: + // - Partial unrolling clones child loops within the current loop. If it + // uses a remainder, then it can also create any number of sibling loops. + // - Full unrolling clones child loops within the current loop but then + // removes the current loop making all of the children appear to be new + // sibling loops. + // - Loop peeling can directly introduce new sibling loops by peeling one + // iteration. + // + // When a new loop appears as a sibling loop, either from peeling an + // iteration or fully unrolling, its nesting structure has fundamentally + // changed and we want to revisit it to reflect that. + // + // When unrolling has removed the current loop, we need to tell the + // infrastructure that it is gone. + // + // Finally, we support a debugging/testing mode where we revisit child loops + // as well. These are not expected to require further optimizations as either + // they or the loop they were cloned from have been directly visited already. + // But the debugging mode allows us to check this assumption. + bool IsCurrentLoopValid = false; + SmallVector<Loop *, 4> SibLoops; + if (ParentL) + SibLoops.append(ParentL->begin(), ParentL->end()); + else + SibLoops.append(AR.LI.begin(), AR.LI.end()); + erase_if(SibLoops, [&](Loop *SibLoop) { + if (SibLoop == &L) { + IsCurrentLoopValid = true; + return true; + } + + // Otherwise erase the loop from the list if it was in the old loops. + return OldLoops.count(SibLoop) != 0; + }); + Updater.addSiblingLoops(SibLoops); + + if (!IsCurrentLoopValid) { + Updater.markLoopAsDeleted(L); + } else { + // We can only walk child loops if the current loop remained valid. + if (UnrollRevisitChildLoops) { + // Walk *all* of the child loops. This is a highly speculative mode + // anyways so look for any simplifications that arose from partial + // unrolling or peeling off of iterations. + SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end()); + Updater.addChildLoops(ChildLoops); + } + } + return getLoopPassPreservedAnalyses(); } |