diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 181 |
1 files changed, 97 insertions, 84 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 4c2b079c6bb5b..87f40bb7ba852 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -154,6 +154,10 @@ static cl::opt<bool> cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low.")); +static cl::opt<bool> UnrollAllowLoopNestsPeeling( + "unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, + cl::desc("Allows loop nests to be peeled.")); + static cl::opt<bool> UnrollUnrollRemainder( "unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled.")); @@ -167,6 +171,16 @@ static cl::opt<bool> UnrollRevisitChildLoops( "This shouldn't typically be needed as child loops (or their " "clones) were already visited.")); +static cl::opt<unsigned> UnrollThresholdAggressive( + "unroll-threshold-aggressive", cl::init(300), cl::Hidden, + cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " + "optimizations")); +static cl::opt<unsigned> + UnrollThresholdDefault("unroll-threshold-default", cl::init(150), + cl::Hidden, + cl::desc("Default threshold (max size of unrolled " + "loop), used in all but O3 optimizations")); + /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much /// code expansion would result. @@ -179,19 +193,17 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional<unsigned> UserThreshold, Optional<unsigned> UserCount, Optional<bool> UserAllowPartial, Optional<bool> UserRuntime, - Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling, - Optional<bool> UserAllowProfileBasedPeeling, - Optional<unsigned> UserFullUnrollMaxCount) { + Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults - UP.Threshold = OptLevel > 2 ? 300 : 150; + UP.Threshold = + OptLevel > 2 ? UnrollThresholdAggressive : UnrollThresholdDefault; UP.MaxPercentThresholdBoost = 400; UP.OptSizeThreshold = 0; UP.PartialThreshold = 150; UP.PartialOptSizeThreshold = 0; UP.Count = 0; - UP.PeelCount = 0; UP.DefaultUnrollRuntimeCount = 8; UP.MaxCount = std::numeric_limits<unsigned>::max(); UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max(); @@ -203,10 +215,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.AllowExpensiveTripCount = false; UP.Force = false; UP.UpperBound = false; - UP.AllowPeeling = true; UP.UnrollAndJam = false; - UP.PeelProfiledIterations = true; UP.UnrollAndJamInnerLoopThreshold = 60; + UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze; // Override with any target specific settings TTI.getUnrollingPreferences(L, SE, UP); @@ -232,8 +243,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.MaxCount = UnrollMaxCount; if (UnrollFullMaxCount.getNumOccurrences() > 0) UP.FullUnrollMaxCount = UnrollFullMaxCount; - if (UnrollPeelCount.getNumOccurrences() > 0) - UP.PeelCount = UnrollPeelCount; if (UnrollAllowPartial.getNumOccurrences() > 0) UP.Partial = UnrollAllowPartial; if (UnrollAllowRemainder.getNumOccurrences() > 0) @@ -242,10 +251,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.Runtime = UnrollRuntime; if (UnrollMaxUpperBound == 0) UP.UpperBound = false; - if (UnrollAllowPeeling.getNumOccurrences() > 0) - UP.AllowPeeling = UnrollAllowPeeling; if (UnrollUnrollRemainder.getNumOccurrences() > 0) UP.UnrollRemainder = UnrollUnrollRemainder; + if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0) + UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze; // Apply user values provided by argument if (UserThreshold.hasValue()) { @@ -260,16 +269,45 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.Runtime = *UserRuntime; if (UserUpperBound.hasValue()) UP.UpperBound = *UserUpperBound; - if (UserAllowPeeling.hasValue()) - UP.AllowPeeling = *UserAllowPeeling; - if (UserAllowProfileBasedPeeling.hasValue()) - UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; if (UserFullUnrollMaxCount.hasValue()) UP.FullUnrollMaxCount = *UserFullUnrollMaxCount; return UP; } +TargetTransformInfo::PeelingPreferences +llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, + const TargetTransformInfo &TTI, + Optional<bool> UserAllowPeeling, + Optional<bool> UserAllowProfileBasedPeeling) { + TargetTransformInfo::PeelingPreferences PP; + + // Default values + PP.PeelCount = 0; + PP.AllowPeeling = true; + PP.AllowLoopNestsPeeling = false; + PP.PeelProfiledIterations = true; + + // Get Target Specifc Values + TTI.getPeelingPreferences(L, SE, PP); + + // User Specified Values using cl::opt + if (UnrollPeelCount.getNumOccurrences() > 0) + PP.PeelCount = UnrollPeelCount; + if (UnrollAllowPeeling.getNumOccurrences() > 0) + PP.AllowPeeling = UnrollAllowPeeling; + if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) + PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; + + // User Specifed values provided by argument + if (UserAllowPeeling.hasValue()) + PP.AllowPeeling = *UserAllowPeeling; + if (UserAllowProfileBasedPeeling.hasValue()) + PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; + + return PP; +} + namespace { /// A struct to densely store the state of an instruction after unrolling at @@ -335,11 +373,12 @@ struct EstimatedUnrollCost { static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost( const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues, - const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) { + const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, + unsigned MaxIterationsCountToAnalyze) { // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. - assert(UnrollMaxIterationsCountToAnalyze < + assert(MaxIterationsCountToAnalyze < (unsigned)(std::numeric_limits<int>::max() / 2) && "The unroll iterations max is too large!"); @@ -349,8 +388,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost( return None; // Don't simulate loops with a big or unknown tripcount - if (!UnrollMaxIterationsCountToAnalyze || !TripCount || - TripCount > UnrollMaxIterationsCountToAnalyze) + if (!TripCount || TripCount > MaxIterationsCountToAnalyze) return None; SmallSetVector<BasicBlock *, 16> BBWorklist; @@ -428,7 +466,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost( // First accumulate the cost of this instruction. if (!Cost.IsFree) { - UnrolledCost += TTI.getUserCost(I); + UnrolledCost += TTI.getUserCost(I, TargetTransformInfo::TCK_CodeSize); LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "); LLVM_DEBUG(I->dump()); @@ -521,7 +559,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost( // Track this instruction's expected baseline cost when executing the // rolled loop form. - RolledDynamicCost += TTI.getUserCost(&I); + RolledDynamicCost += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after @@ -665,32 +703,32 @@ unsigned llvm::ApproximateLoopSize( // Returns the loop hint metadata node with the given name (for example, // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is // returned. -static MDNode *GetUnrollMetadataForLoop(const Loop *L, StringRef Name) { +static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) { if (MDNode *LoopID = L->getLoopID()) return GetUnrollMetadata(LoopID, Name); return nullptr; } // Returns true if the loop has an unroll(full) pragma. -static bool HasUnrollFullPragma(const Loop *L) { - return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full"); +static bool hasUnrollFullPragma(const Loop *L) { + return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full"); } // Returns true if the loop has an unroll(enable) pragma. This metadata is used // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives. -static bool HasUnrollEnablePragma(const Loop *L) { - return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable"); +static bool hasUnrollEnablePragma(const Loop *L) { + return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable"); } // Returns true if the loop has an runtime unroll(disable) pragma. -static bool HasRuntimeUnrollDisablePragma(const Loop *L) { - return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable"); +static bool hasRuntimeUnrollDisablePragma(const Loop *L) { + return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable"); } // If loop has an unroll_count pragma return the (necessarily // positive) value from the pragma. Otherwise return 0. -static unsigned UnrollCountPragmaValue(const Loop *L) { - MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count"); +static unsigned unrollCountPragmaValue(const Loop *L) { + MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count"); if (MD) { assert(MD->getNumOperands() == 2 && "Unroll count hint metadata should have two operands."); @@ -740,7 +778,8 @@ bool llvm::computeUnrollCount( ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) { + TargetTransformInfo::UnrollingPreferences &UP, + TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. @@ -754,7 +793,7 @@ bool llvm::computeUnrollCount( } // 2nd priority is unroll count set by pragma. - unsigned PragmaCount = UnrollCountPragmaValue(L); + unsigned PragmaCount = unrollCountPragmaValue(L); if (PragmaCount > 0) { UP.Count = PragmaCount; UP.Runtime = true; @@ -764,14 +803,14 @@ bool llvm::computeUnrollCount( getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold) return true; } - bool PragmaFullUnroll = HasUnrollFullPragma(L); + bool PragmaFullUnroll = hasUnrollFullPragma(L); if (PragmaFullUnroll && TripCount != 0) { UP.Count = TripCount; if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold) return false; } - bool PragmaEnableUnroll = HasUnrollEnablePragma(L); + bool PragmaEnableUnroll = hasUnrollEnablePragma(L); bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || PragmaEnableUnroll || UserUnrollCount; @@ -827,7 +866,8 @@ bool llvm::computeUnrollCount( // To check that, run additional analysis on the loop. if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost( L, FullUnrollTripCount, DT, SE, EphValues, TTI, - UP.Threshold * UP.MaxPercentThresholdBoost / 100)) { + UP.Threshold * UP.MaxPercentThresholdBoost / 100, + UP.MaxIterationsCountToAnalyze)) { unsigned Boost = getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { @@ -841,8 +881,8 @@ bool llvm::computeUnrollCount( } // 4th priority is loop peeling. - computePeelCount(L, LoopSize, UP, TripCount, SE); - if (UP.PeelCount) { + computePeelCount(L, LoopSize, UP, PP, TripCount, SE); + if (PP.PeelCount) { UP.Runtime = false; UP.Count = 1; return ExplicitUnroll; @@ -925,7 +965,7 @@ bool llvm::computeUnrollCount( // 6th priority is runtime unrolling. // Don't unroll a runtime trip count loop when it is disabled. - if (HasRuntimeUnrollDisablePragma(L)) { + if (hasRuntimeUnrollDisablePragma(L)) { UP.Count = 0; return false; } @@ -1045,8 +1085,9 @@ static LoopUnrollResult tryToUnrollLoop( TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, - ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount); + TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences( + L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling); // Exit early if unrolling is disabled. For OptForSize, we pick the loop size // as threshold later on. @@ -1120,7 +1161,7 @@ static LoopUnrollResult tryToUnrollLoop( bool UseUpperBound = false; bool IsCountSetExplicitly = computeUnrollCount( L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, - TripMultiple, LoopSize, UP, UseUpperBound); + TripMultiple, LoopSize, UP, PP, UseUpperBound); if (!UP.Count) return LoopUnrollResult::Unmodified; // Unroll factor (Count) must be less or equal to TripCount. @@ -1135,9 +1176,9 @@ static LoopUnrollResult tryToUnrollLoop( LoopUnrollResult UnrollResult = UnrollLoop( L, {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, - UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder, + UseUpperBound, MaxOrZero, TripMultiple, PP.PeelCount, UP.UnrollRemainder, ForgetAllSCEV}, - LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop); + LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); if (UnrollResult == LoopUnrollResult::Unmodified) return LoopUnrollResult::Unmodified; @@ -1167,7 +1208,7 @@ static LoopUnrollResult tryToUnrollLoop( // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. if (UnrollResult != LoopUnrollResult::FullyUnrolled && - (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount))) + (IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount))) L->setLoopAlreadyUnrolled(); return UnrollResult; @@ -1296,16 +1337,10 @@ Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced, PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &Updater) { - const auto &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); - Function *F = L.getHeader()->getParent(); - - auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); - // FIXME: This should probably be optional rather than required. - if (!ORE) - report_fatal_error( - "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not " - "cached at a higher level"); + // For the new PM, we can't use OptimizationRemarkEmitter as an analysis + // pass. Function analyses need to be preserved across loop transformations + // but ORE cannot be preserved (see comment before the pass definition). + OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); // Keep track of the previous loop structure so we can identify new loops // created by unrolling. @@ -1316,9 +1351,9 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, else OldLoops.insert(AR.LI.begin(), AR.LI.end()); - std::string LoopName = L.getName(); + std::string LoopName = std::string(L.getName()); - bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, + bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, /*BFI*/ nullptr, /*PSI*/ nullptr, /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced, ForgetSCEV, /*Count*/ None, @@ -1384,30 +1419,6 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } -template <typename RangeT> -static SmallVector<Loop *, 8> appendLoopsToWorklist(RangeT &&Loops) { - SmallVector<Loop *, 8> Worklist; - // We use an internal worklist to build up the preorder traversal without - // recursion. - SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist; - - for (Loop *RootL : Loops) { - assert(PreOrderLoops.empty() && "Must start with an empty preorder walk."); - assert(PreOrderWorklist.empty() && - "Must start with an empty preorder walk worklist."); - PreOrderWorklist.push_back(RootL); - do { - Loop *L = PreOrderWorklist.pop_back_val(); - PreOrderWorklist.append(L->begin(), L->end()); - PreOrderLoops.push_back(L); - } while (!PreOrderWorklist.empty()); - - Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end()); - PreOrderLoops.clear(); - } - return Worklist; -} - PreservedAnalyses LoopUnrollPass::run(Function &F, FunctionAnalysisManager &AM) { auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); @@ -1421,10 +1432,9 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F)) LAM = &LAMProxy->getManager(); - const ModuleAnalysisManager &MAM = - AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); + auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); ProfileSummaryInfo *PSI = - MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); auto *BFI = (PSI && PSI->hasProfileSummary()) ? &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; @@ -1441,7 +1451,10 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, Changed |= formLCSSARecursively(*L, DT, &LI, &SE); } - SmallVector<Loop *, 8> Worklist = appendLoopsToWorklist(LI); + // Add the loop nests in the reverse order of LoopInfo. See method + // declaration. + SmallPriorityWorklist<Loop *, 4> Worklist; + appendLoopsToWorklist(LI, Worklist); while (!Worklist.empty()) { // Because the LoopInfo stores the loops in RPO, we walk the worklist @@ -1459,7 +1472,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling; if (PSI && PSI->hasHugeWorkingSetSize()) LocalAllowPeeling = false; - std::string LoopName = L.getName(); + std::string LoopName = std::string(L.getName()); // The API here is quite complex to call and we allow to select some // flavors of unrolling during construction time (by setting UnrollOpts). LoopUnrollResult Result = tryToUnrollLoop( |