src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2018-07-28 10:51:19 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2018-07-28 10:51:19 +0000
commit	eb11fae6d08f479c0799db45860a98af528fa6e7 (patch)
tree	44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Transforms/Scalar/LoopUnrollPass.cpp
parent	b8a2042aa938069e862750553db0e4d82d25822c (diff)
download	src-eb11fae6d08f479c0799db45860a98af528fa6e7.tar.gz src-eb11fae6d08f479c0799db45860a98af528fa6e7.zip

vendor/llvm/llvm-trunk-r338150

Notes

Diffstat (limited to 'lib/Transforms/Scalar/LoopUnrollPass.cpp')

-rw-r--r--

lib/Transforms/Scalar/LoopUnrollPass.cpp

149

1 files changed, 83 insertions, 66 deletions

diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 15e7da5e1a7a..634215c9770f 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp

@@ -53,6 +53,7 @@

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Scalar.h"

#include "llvm/Transforms/Scalar/LoopPassManager.h"

+#include "llvm/Transforms/Utils.h"

#include "llvm/Transforms/Utils/LoopSimplify.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/UnrollLoop.h"

@@ -164,7 +165,7 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();

/// Gather the various unrolling parameters based on the defaults, compiler

/// flags, TTI overrides and user specified parameters.

-static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(

+TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(

Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,

Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,

Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,

@@ -191,6 +192,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(

UP.Force = false;

UP.UpperBound = false;

UP.AllowPeeling = true;

+ UP.UnrollAndJam = false;

+ UP.UnrollAndJamInnerLoopThreshold = 60;

// Override with any target specific settings

TTI.getUnrollingPreferences(L, SE, UP);

@@ -285,17 +288,17 @@ struct UnrolledInstStateKeyInfo {

};

struct EstimatedUnrollCost {

- /// \brief The estimated cost after unrolling.

+ /// The estimated cost after unrolling.

unsigned UnrolledCost;

- /// \brief The estimated dynamic cost of executing the instructions in the

+ /// The estimated dynamic cost of executing the instructions in the

/// rolled form.

unsigned RolledDynamicCost;

};

} // end anonymous namespace

-/// \brief Figure out if the loop is worth full unrolling.

+/// Figure out if the loop is worth full unrolling.

///

/// Complete loop unrolling can make some loads constant, and we need to know

/// if that would expose any further optimization opportunities. This routine

@@ -308,10 +311,10 @@ struct EstimatedUnrollCost {

/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If

/// the analysis failed (no benefits expected from the unrolling, or the loop is

/// too big to analyze), the returned value is None.

-static Optional<EstimatedUnrollCost>

-analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,

- ScalarEvolution &SE, const TargetTransformInfo &TTI,

- unsigned MaxUnrolledLoopSize) {

+static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(

+ const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,

+ const SmallPtrSetImpl<const Value *> &EphValues,

+ const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {

// We want to be able to scale offsets by the trip count and add more offsets

// to them without checking for overflows, and we already don't want to

// analyze *massive* trip counts, so we force the max to be reasonably small.

@@ -405,9 +408,9 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,

// First accumulate the cost of this instruction.

if (!Cost.IsFree) {

UnrolledCost += TTI.getUserCost(I);

- DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration

- << "): ");

- DEBUG(I->dump());

+ LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "

+ << Iteration << "): ");

+ LLVM_DEBUG(I->dump());

}

// We must count the cost of every operand which is not free,

@@ -442,14 +445,14 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,

assert(L->isLCSSAForm(DT) &&

"Must have loops in LCSSA form to track live-out values.");

- DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

+ LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

// Simulate execution of each iteration of the loop counting instructions,

// which would be simplified.

// Since the same load will take different values on different iterations,

// we literally have to go through all loop's iterations.

for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {

- DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

+ LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

// Prepare for the iteration by collecting any simplified entry or backedge

// inputs.

@@ -490,7 +493,9 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,

// it. We don't change the actual IR, just count optimization

// opportunities.

for (Instruction &I : *BB) {

- if (isa<DbgInfoIntrinsic>(I))

+ // These won't get into the final code - don't even try calculating the

+ // cost for them.

+ if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))

continue;

// Track this instruction's expected baseline cost when executing the

@@ -512,8 +517,13 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,

// Can't properly model a cost of a call.

// FIXME: With a proper cost model we should be able to do it.

- if(isa<CallInst>(&I))

- return None;

+ if (auto *CI = dyn_cast<CallInst>(&I)) {

+ const Function *Callee = CI->getCalledFunction();

+ if (!Callee || TTI.isLoweredToCall(Callee)) {

+ LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");

+ return None;

+ }

// If the instruction might have a side-effect recursively account for

// the cost of it and all the instructions leading up to it.

@@ -522,10 +532,10 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,

// If unrolled body turns out to be too big, bail out.

if (UnrolledCost > MaxUnrolledLoopSize) {

- DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"

- << " UnrolledCost: " << UnrolledCost

- << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize

- << "\n");

+ LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"

+ << " UnrolledCost: " << UnrolledCost

+ << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize

+ << "\n");

return None;

}

@@ -578,8 +588,8 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,

// If we found no optimization opportunities on the first iteration, we

// won't find them on later ones too.

if (UnrolledCost == RolledDynamicCost) {

- DEBUG(dbgs() << " No opportunities found.. exiting.\n"

- << " UnrolledCost: " << UnrolledCost << "\n");

+ LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"

+ << " UnrolledCost: " << UnrolledCost << "\n");

return None;

}

@@ -600,20 +610,17 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,

}

- DEBUG(dbgs() << "Analysis finished:\n"

- << "UnrolledCost: " << UnrolledCost << ", "

- << "RolledDynamicCost: " << RolledDynamicCost << "\n");

+ LLVM_DEBUG(dbgs() << "Analysis finished:\n"

+ << "UnrolledCost: " << UnrolledCost << ", "

+ << "RolledDynamicCost: " << RolledDynamicCost << "\n");

return {{UnrolledCost, RolledDynamicCost}};

}

/// ApproximateLoopSize - Approximate the size of the loop.

-static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,

- bool &NotDuplicatable, bool &Convergent,

- const TargetTransformInfo &TTI,

- AssumptionCache *AC, unsigned BEInsns) {

- SmallPtrSet<const Value *, 32> EphValues;

- CodeMetrics::collectEphemeralValues(L, AC, EphValues);

+unsigned llvm::ApproximateLoopSize(

+ const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,

+ const TargetTransformInfo &TTI,

+ const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {

CodeMetrics Metrics;

for (BasicBlock *BB : L->blocks())

Metrics.analyzeBasicBlock(BB, TTI, EphValues);

@@ -706,10 +713,11 @@ static uint64_t getUnrolledLoopSize(

// Returns true if unroll count was set explicitly.

// Calculates unroll count and writes it to UP.Count.

-static bool computeUnrollCount(

+bool llvm::computeUnrollCount(

Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,

- ScalarEvolution &SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,

- unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,

+ ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,

+ OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,

+ unsigned &TripMultiple, unsigned LoopSize,

TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {

// Check for explicit Count.

// 1st priority is unroll count set by "unroll-count" option.

@@ -729,7 +737,7 @@ static bool computeUnrollCount(

UP.Runtime = true;

UP.AllowExpensiveTripCount = true;

UP.Force = true;

- if (UP.AllowRemainder &&

+ if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&

getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)

return true;

}

@@ -746,8 +754,8 @@ static bool computeUnrollCount(

if (ExplicitUnroll && TripCount != 0) {

// If the loop has an unrolling pragma, we want to be more aggressive with

- // unrolling limits. Set thresholds to at least the PragmaThreshold value

- // which is larger than the default limits.

+ // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold

+ // value which is larger than the default limits.

UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);

UP.PartialThreshold =

std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);

@@ -763,7 +771,7 @@ static bool computeUnrollCount(

// compute the former when the latter is zero.

unsigned ExactTripCount = TripCount;

assert((ExactTripCount == 0 || MaxTripCount == 0) &&

- "ExtractTripCound and MaxTripCount cannot both be non zero.");

+ "ExtractTripCount and MaxTripCount cannot both be non zero.");

unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;

UP.Count = FullUnrollTripCount;

if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {

@@ -779,7 +787,7 @@ static bool computeUnrollCount(

// helps to remove a significant number of instructions.

// To check that, run additional analysis on the loop.

if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(

- L, FullUnrollTripCount, DT, SE, TTI,

+ L, FullUnrollTripCount, DT, SE, EphValues, TTI,

UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {

unsigned Boost =

getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);

@@ -794,7 +802,7 @@ static bool computeUnrollCount(

}

// 4th priority is loop peeling

- computePeelCount(L, LoopSize, UP, TripCount);

+ computePeelCount(L, LoopSize, UP, TripCount, SE);

if (UP.PeelCount) {

UP.Runtime = false;

UP.Count = 1;

@@ -802,12 +810,12 @@ static bool computeUnrollCount(

}

// 5th priority is partial unrolling.

- // Try partial unroll only when TripCount could be staticaly calculated.

+ // Try partial unroll only when TripCount could be statically calculated.

if (TripCount) {

UP.Partial |= ExplicitUnroll;

if (!UP.Partial) {

- DEBUG(dbgs() << " will not try to unroll partially because "

- << "-unroll-allow-partial not given\n");

+ LLVM_DEBUG(dbgs() << " will not try to unroll partially because "

+ << "-unroll-allow-partial not given\n");

UP.Count = 0;

return false;

}

@@ -894,8 +902,9 @@ static bool computeUnrollCount(

// Reduce count based on the type of unrolling and the threshold values.

UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;

if (!UP.Runtime) {

- DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "

- << "-unroll-runtime not given\n");

+ LLVM_DEBUG(

+ dbgs() << " will not try to unroll loop with runtime trip count "

+ << "-unroll-runtime not given\n");

UP.Count = 0;

return false;

}

@@ -915,12 +924,13 @@ static bool computeUnrollCount(

if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {

while (UP.Count != 0 && TripMultiple % UP.Count != 0)

UP.Count >>= 1;

- DEBUG(dbgs() << "Remainder loop is restricted (that could architecture "

- "specific or because the loop contains a convergent "

- "instruction), so unroll count must divide the trip "

- "multiple, "

- << TripMultiple << ". Reducing unroll count from "

- << OrigCount << " to " << UP.Count << ".\n");

+ LLVM_DEBUG(

+ dbgs() << "Remainder loop is restricted (that could architecture "

+ "specific or because the loop contains a convergent "

+ "instruction), so unroll count must divide the trip "

+ "multiple, "

+ << TripMultiple << ". Reducing unroll count from " << OrigCount

+ << " to " << UP.Count << ".\n");

using namespace ore;

@@ -942,7 +952,8 @@ static bool computeUnrollCount(

if (UP.Count > UP.MaxCount)

UP.Count = UP.MaxCount;

- DEBUG(dbgs() << " partially unrolling with count: " << UP.Count << "\n");

+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count

+ << "\n");

if (UP.Count < 2)

UP.Count = 0;

return ExplicitUnroll;

@@ -955,12 +966,13 @@ static LoopUnrollResult tryToUnrollLoop(

Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold,

Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime,

Optional<bool> ProvidedUpperBound, Optional<bool> ProvidedAllowPeeling) {

- DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()

- << "] Loop %" << L->getHeader()->getName() << "\n");

+ LLVM_DEBUG(dbgs() << "Loop Unroll: F["

+ << L->getHeader()->getParent()->getName() << "] Loop %"

+ << L->getHeader()->getName() << "\n");

if (HasUnrollDisablePragma(L))

return LoopUnrollResult::Unmodified;

if (!L->isLoopSimplifyForm()) {

- DEBUG(

+ LLVM_DEBUG(

dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");

return LoopUnrollResult::Unmodified;

}

@@ -975,16 +987,21 @@ static LoopUnrollResult tryToUnrollLoop(

// Exit early if unrolling is disabled.

if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))

return LoopUnrollResult::Unmodified;

- unsigned LoopSize = ApproximateLoopSize(

- L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns);

- DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");

+ SmallPtrSet<const Value *, 32> EphValues;

+ CodeMetrics::collectEphemeralValues(L, &AC, EphValues);

+ unsigned LoopSize =

+ ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,

+ TTI, EphValues, UP.BEInsns);

+ LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");

if (NotDuplicatable) {

- DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"

- << " instructions.\n");

+ LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"

+ << " instructions.\n");

return LoopUnrollResult::Unmodified;

}

if (NumInlineCandidates != 0) {

- DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");

+ LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");

return LoopUnrollResult::Unmodified;

}

@@ -1030,7 +1047,7 @@ static LoopUnrollResult tryToUnrollLoop(

// loop tests remains the same compared to the non-unrolled version, whereas

// the generic upper bound unrolling keeps all but the last loop test so the

// number of loop tests goes up which may end up being worse on targets with

- // constriained branch predictor resources so is controlled by an option.)

+ // constrained branch predictor resources so is controlled by an option.)

// In addition we only unroll small upper bounds.

if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {

MaxTripCount = 0;

@@ -1040,9 +1057,9 @@ static LoopUnrollResult tryToUnrollLoop(

// computeUnrollCount() decides whether it is beneficial to use upper bound to

// fully unroll the loop.

bool UseUpperBound = false;

- bool IsCountSetExplicitly =

- computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount,

- TripMultiple, LoopSize, UP, UseUpperBound);

+ bool IsCountSetExplicitly = computeUnrollCount(

+ L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,

+ TripMultiple, LoopSize, UP, UseUpperBound);

if (!UP.Count)

return LoopUnrollResult::Unmodified;

// Unroll factor (Count) must be less or equal to TripCount.