From eb11fae6d08f479c0799db45860a98af528fa6e7 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 28 Jul 2018 10:51:19 +0000 Subject: Vendor import of llvm trunk r338150: https://llvm.org/svn/llvm-project/llvm/trunk@338150 --- lib/Transforms/Scalar/LoopUnrollPass.cpp | 149 +++++++++++++++++-------------- 1 file changed, 83 insertions(+), 66 deletions(-) (limited to 'lib/Transforms/Scalar/LoopUnrollPass.cpp') diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 15e7da5e1a7a..634215c9770f 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -53,6 +53,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" @@ -164,7 +165,7 @@ static const unsigned NoThreshold = std::numeric_limits::max(); /// Gather the various unrolling parameters based on the defaults, compiler /// flags, TTI overrides and user specified parameters. -static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( +TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, @@ -191,6 +192,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( UP.Force = false; UP.UpperBound = false; UP.AllowPeeling = true; + UP.UnrollAndJam = false; + UP.UnrollAndJamInnerLoopThreshold = 60; // Override with any target specific settings TTI.getUnrollingPreferences(L, SE, UP); @@ -285,17 +288,17 @@ struct UnrolledInstStateKeyInfo { }; struct EstimatedUnrollCost { - /// \brief The estimated cost after unrolling. + /// The estimated cost after unrolling. unsigned UnrolledCost; - /// \brief The estimated dynamic cost of executing the instructions in the + /// The estimated dynamic cost of executing the instructions in the /// rolled form. unsigned RolledDynamicCost; }; } // end anonymous namespace -/// \brief Figure out if the loop is worth full unrolling. +/// Figure out if the loop is worth full unrolling. /// /// Complete loop unrolling can make some loads constant, and we need to know /// if that would expose any further optimization opportunities. This routine @@ -308,10 +311,10 @@ struct EstimatedUnrollCost { /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If /// the analysis failed (no benefits expected from the unrolling, or the loop is /// too big to analyze), the returned value is None. -static Optional -analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, - ScalarEvolution &SE, const TargetTransformInfo &TTI, - unsigned MaxUnrolledLoopSize) { +static Optional analyzeLoopUnrollCost( + const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, + const SmallPtrSetImpl &EphValues, + const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) { // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. @@ -405,9 +408,9 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, // First accumulate the cost of this instruction. if (!Cost.IsFree) { UnrolledCost += TTI.getUserCost(I); - DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration - << "): "); - DEBUG(I->dump()); + LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration " + << Iteration << "): "); + LLVM_DEBUG(I->dump()); } // We must count the cost of every operand which is not free, @@ -442,14 +445,14 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, assert(L->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values."); - DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); + LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); // Simulate execution of each iteration of the loop counting instructions, // which would be simplified. // Since the same load will take different values on different iterations, // we literally have to go through all loop's iterations. for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { - DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); + LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); // Prepare for the iteration by collecting any simplified entry or backedge // inputs. @@ -490,7 +493,9 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, // it. We don't change the actual IR, just count optimization // opportunities. for (Instruction &I : *BB) { - if (isa(I)) + // These won't get into the final code - don't even try calculating the + // cost for them. + if (isa(I) || EphValues.count(&I)) continue; // Track this instruction's expected baseline cost when executing the @@ -512,8 +517,13 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, // Can't properly model a cost of a call. // FIXME: With a proper cost model we should be able to do it. - if(isa(&I)) - return None; + if (auto *CI = dyn_cast(&I)) { + const Function *Callee = CI->getCalledFunction(); + if (!Callee || TTI.isLoweredToCall(Callee)) { + LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n"); + return None; + } + } // If the instruction might have a side-effect recursively account for // the cost of it and all the instructions leading up to it. @@ -522,10 +532,10 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, // If unrolled body turns out to be too big, bail out. if (UnrolledCost > MaxUnrolledLoopSize) { - DEBUG(dbgs() << " Exceeded threshold.. exiting.\n" - << " UnrolledCost: " << UnrolledCost - << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize - << "\n"); + LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n" + << " UnrolledCost: " << UnrolledCost + << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize + << "\n"); return None; } } @@ -578,8 +588,8 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, // If we found no optimization opportunities on the first iteration, we // won't find them on later ones too. if (UnrolledCost == RolledDynamicCost) { - DEBUG(dbgs() << " No opportunities found.. exiting.\n" - << " UnrolledCost: " << UnrolledCost << "\n"); + LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n" + << " UnrolledCost: " << UnrolledCost << "\n"); return None; } } @@ -600,20 +610,17 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, } } - DEBUG(dbgs() << "Analysis finished:\n" - << "UnrolledCost: " << UnrolledCost << ", " - << "RolledDynamicCost: " << RolledDynamicCost << "\n"); + LLVM_DEBUG(dbgs() << "Analysis finished:\n" + << "UnrolledCost: " << UnrolledCost << ", " + << "RolledDynamicCost: " << RolledDynamicCost << "\n"); return {{UnrolledCost, RolledDynamicCost}}; } /// ApproximateLoopSize - Approximate the size of the loop. -static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, - bool &NotDuplicatable, bool &Convergent, - const TargetTransformInfo &TTI, - AssumptionCache *AC, unsigned BEInsns) { - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(L, AC, EphValues); - +unsigned llvm::ApproximateLoopSize( + const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, + const TargetTransformInfo &TTI, + const SmallPtrSetImpl &EphValues, unsigned BEInsns) { CodeMetrics Metrics; for (BasicBlock *BB : L->blocks()) Metrics.analyzeBasicBlock(BB, TTI, EphValues); @@ -706,10 +713,11 @@ static uint64_t getUnrolledLoopSize( // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. -static bool computeUnrollCount( +bool llvm::computeUnrollCount( Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, - ScalarEvolution &SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount, - unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, + ScalarEvolution &SE, const SmallPtrSetImpl &EphValues, + OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, + unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) { // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. @@ -729,7 +737,7 @@ static bool computeUnrollCount( UP.Runtime = true; UP.AllowExpensiveTripCount = true; UP.Force = true; - if (UP.AllowRemainder && + if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) && getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold) return true; } @@ -746,8 +754,8 @@ static bool computeUnrollCount( if (ExplicitUnroll && TripCount != 0) { // If the loop has an unrolling pragma, we want to be more aggressive with - // unrolling limits. Set thresholds to at least the PragmaThreshold value - // which is larger than the default limits. + // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold + // value which is larger than the default limits. UP.Threshold = std::max(UP.Threshold, PragmaUnrollThreshold); UP.PartialThreshold = std::max(UP.PartialThreshold, PragmaUnrollThreshold); @@ -763,7 +771,7 @@ static bool computeUnrollCount( // compute the former when the latter is zero. unsigned ExactTripCount = TripCount; assert((ExactTripCount == 0 || MaxTripCount == 0) && - "ExtractTripCound and MaxTripCount cannot both be non zero."); + "ExtractTripCount and MaxTripCount cannot both be non zero."); unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount; UP.Count = FullUnrollTripCount; if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { @@ -779,7 +787,7 @@ static bool computeUnrollCount( // helps to remove a significant number of instructions. // To check that, run additional analysis on the loop. if (Optional Cost = analyzeLoopUnrollCost( - L, FullUnrollTripCount, DT, SE, TTI, + L, FullUnrollTripCount, DT, SE, EphValues, TTI, UP.Threshold * UP.MaxPercentThresholdBoost / 100)) { unsigned Boost = getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); @@ -794,7 +802,7 @@ static bool computeUnrollCount( } // 4th priority is loop peeling - computePeelCount(L, LoopSize, UP, TripCount); + computePeelCount(L, LoopSize, UP, TripCount, SE); if (UP.PeelCount) { UP.Runtime = false; UP.Count = 1; @@ -802,12 +810,12 @@ static bool computeUnrollCount( } // 5th priority is partial unrolling. - // Try partial unroll only when TripCount could be staticaly calculated. + // Try partial unroll only when TripCount could be statically calculated. if (TripCount) { UP.Partial |= ExplicitUnroll; if (!UP.Partial) { - DEBUG(dbgs() << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"); + LLVM_DEBUG(dbgs() << " will not try to unroll partially because " + << "-unroll-allow-partial not given\n"); UP.Count = 0; return false; } @@ -894,8 +902,9 @@ static bool computeUnrollCount( // Reduce count based on the type of unrolling and the threshold values. UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; if (!UP.Runtime) { - DEBUG(dbgs() << " will not try to unroll loop with runtime trip count " - << "-unroll-runtime not given\n"); + LLVM_DEBUG( + dbgs() << " will not try to unroll loop with runtime trip count " + << "-unroll-runtime not given\n"); UP.Count = 0; return false; } @@ -915,12 +924,13 @@ static bool computeUnrollCount( if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) { while (UP.Count != 0 && TripMultiple % UP.Count != 0) UP.Count >>= 1; - DEBUG(dbgs() << "Remainder loop is restricted (that could architecture " - "specific or because the loop contains a convergent " - "instruction), so unroll count must divide the trip " - "multiple, " - << TripMultiple << ". Reducing unroll count from " - << OrigCount << " to " << UP.Count << ".\n"); + LLVM_DEBUG( + dbgs() << "Remainder loop is restricted (that could architecture " + "specific or because the loop contains a convergent " + "instruction), so unroll count must divide the trip " + "multiple, " + << TripMultiple << ". Reducing unroll count from " << OrigCount + << " to " << UP.Count << ".\n"); using namespace ore; @@ -942,7 +952,8 @@ static bool computeUnrollCount( if (UP.Count > UP.MaxCount) UP.Count = UP.MaxCount; - DEBUG(dbgs() << " partially unrolling with count: " << UP.Count << "\n"); + LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count + << "\n"); if (UP.Count < 2) UP.Count = 0; return ExplicitUnroll; @@ -955,12 +966,13 @@ static LoopUnrollResult tryToUnrollLoop( Optional ProvidedCount, Optional ProvidedThreshold, Optional ProvidedAllowPartial, Optional ProvidedRuntime, Optional ProvidedUpperBound, Optional ProvidedAllowPeeling) { - DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() - << "] Loop %" << L->getHeader()->getName() << "\n"); + LLVM_DEBUG(dbgs() << "Loop Unroll: F[" + << L->getHeader()->getParent()->getName() << "] Loop %" + << L->getHeader()->getName() << "\n"); if (HasUnrollDisablePragma(L)) return LoopUnrollResult::Unmodified; if (!L->isLoopSimplifyForm()) { - DEBUG( + LLVM_DEBUG( dbgs() << " Not unrolling loop which is not in loop-simplify form.\n"); return LoopUnrollResult::Unmodified; } @@ -975,16 +987,21 @@ static LoopUnrollResult tryToUnrollLoop( // Exit early if unrolling is disabled. if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0)) return LoopUnrollResult::Unmodified; - unsigned LoopSize = ApproximateLoopSize( - L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns); - DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); + + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(L, &AC, EphValues); + + unsigned LoopSize = + ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent, + TTI, EphValues, UP.BEInsns); + LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (NotDuplicatable) { - DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" - << " instructions.\n"); + LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" + << " instructions.\n"); return LoopUnrollResult::Unmodified; } if (NumInlineCandidates != 0) { - DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); + LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); return LoopUnrollResult::Unmodified; } @@ -1030,7 +1047,7 @@ static LoopUnrollResult tryToUnrollLoop( // loop tests remains the same compared to the non-unrolled version, whereas // the generic upper bound unrolling keeps all but the last loop test so the // number of loop tests goes up which may end up being worse on targets with - // constriained branch predictor resources so is controlled by an option.) + // constrained branch predictor resources so is controlled by an option.) // In addition we only unroll small upper bounds. if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) { MaxTripCount = 0; @@ -1040,9 +1057,9 @@ static LoopUnrollResult tryToUnrollLoop( // computeUnrollCount() decides whether it is beneficial to use upper bound to // fully unroll the loop. bool UseUpperBound = false; - bool IsCountSetExplicitly = - computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount, - TripMultiple, LoopSize, UP, UseUpperBound); + bool IsCountSetExplicitly = computeUnrollCount( + L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, + TripMultiple, LoopSize, UP, UseUpperBound); if (!UP.Count) return LoopUnrollResult::Unmodified; // Unroll factor (Count) must be less or equal to TripCount. -- cgit v1.2.3