summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp190
1 files changed, 99 insertions, 91 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 2ab0848193f6..3c484fb0d28a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -37,11 +37,15 @@ static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
-static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
- "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
- cl::desc("The maximum allowed number of runtime memory checks with a "
- "vectorize(enable) pragma."));
+namespace llvm {
+cl::opt<bool>
+ HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
+ cl::desc("Allow enabling loop hints to reorder "
+ "FP operations during vectorization."));
+}
+// TODO: Move size-based thresholds out of legality checking, make cost based
+// decisions instead of hard thresholds.
static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
"vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
cl::desc("The maximum number of SCEV checks allowed."));
@@ -51,6 +55,23 @@ static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
cl::desc("The maximum number of SCEV checks allowed with a "
"vectorize(enable) pragma"));
+// FIXME: When scalable vectorization is stable enough, change the default
+// to SK_PreferFixedWidth.
+static cl::opt<LoopVectorizeHints::ScalableForceKind> ScalableVectorization(
+ "scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly),
+ cl::Hidden,
+ cl::desc("Control whether the compiler can use scalable vectors to "
+ "vectorize a loop"),
+ cl::values(
+ clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
+ "Scalable vectorization is disabled."),
+ clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on",
+ "Scalable vectorization is available, but favor fixed-width "
+ "vectorization when the cost is inconclusive."),
+ clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred",
+ "Scalable vectorization is available and favored when the "
+ "cost is inconclusive.")));
+
/// Maximum vectorization interleave count.
static const unsigned MaxInterleaveFactor = 16;
@@ -60,7 +81,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
switch (Kind) {
case HK_WIDTH:
return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
- case HK_UNROLL:
+ case HK_INTERLEAVE:
return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
case HK_FORCE:
return (Val <= 1);
@@ -76,12 +97,12 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
bool InterleaveOnlyWhenForced,
OptimizationRemarkEmitter &ORE)
: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
- Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
+ Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
- Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
- ORE(ORE) {
+ Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
+ TheLoop(L), ORE(ORE) {
// Populate values with existing loop metadata.
getHintsFromMetadata();
@@ -89,13 +110,23 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
if (VectorizerParams::isInterleaveForced())
Interleave.Value = VectorizerParams::VectorizationInterleave;
+ if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
+ // If the width is set, but the metadata says nothing about the scalable
+ // property, then assume it concerns only a fixed-width UserVF.
+ // If width is not set, the flag takes precedence.
+ Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization;
+ else if (ScalableVectorization == SK_FixedWidthOnly)
+ // If the flag is set to disable any use of scalable vectors, override the
+ // loop hint.
+ Scalable.Value = SK_FixedWidthOnly;
+
if (IsVectorized.Value != 1)
// If the vectorization width and interleaving count are both 1 then
// consider the loop to have been already vectorized because there's
// nothing more that we can do.
IsVectorized.Value =
- getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1;
- LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
+ getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
+ LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
<< "LV: Interleaving disabled by the pass manager\n");
}
@@ -168,8 +199,8 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
R << " (Force=" << NV("Force", true);
if (Width.Value != 0)
R << ", Vector Width=" << NV("VectorWidth", getWidth());
- if (Interleave.Value != 0)
- R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
+ if (getInterleave() != 0)
+ R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
R << ")";
}
return R;
@@ -187,6 +218,15 @@ const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
return OptimizationRemarkAnalysis::AlwaysPrint;
}
+bool LoopVectorizeHints::allowReordering() const {
+ // Allow the vectorizer to change the order of operations if enabling
+ // loop hints are provided
+ ElementCount EC = getWidth();
+ return HintsAllowReordering &&
+ (getForce() == LoopVectorizeHints::FK_Enabled ||
+ EC.getKnownMinValue() > 1);
+}
+
void LoopVectorizeHints::getHintsFromMetadata() {
MDNode *LoopID = TheLoop->getLoopID();
if (!LoopID)
@@ -246,42 +286,6 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
}
}
-bool LoopVectorizationRequirements::doesNotMeet(
- Function *F, Loop *L, const LoopVectorizeHints &Hints) {
- const char *PassName = Hints.vectorizeAnalysisPassName();
- bool Failed = false;
- if (UnsafeAlgebraInst && !Hints.allowReordering()) {
- ORE.emit([&]() {
- return OptimizationRemarkAnalysisFPCommute(
- PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
- UnsafeAlgebraInst->getParent())
- << "loop not vectorized: cannot prove it is safe to reorder "
- "floating-point operations";
- });
- Failed = true;
- }
-
- // Test if runtime memcheck thresholds are exceeded.
- bool PragmaThresholdReached =
- NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
- bool ThresholdReached =
- NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
- if ((ThresholdReached && !Hints.allowReordering()) ||
- PragmaThresholdReached) {
- ORE.emit([&]() {
- return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
- L->getStartLoc(),
- L->getHeader())
- << "loop not vectorized: cannot prove it is safe to reorder "
- "memory operations";
- });
- LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
- Failed = true;
- }
-
- return Failed;
-}
-
// Return true if the inner loop \p Lp is uniform with regard to the outer loop
// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
// executing the inner loop will execute the same iterations). This check is
@@ -415,7 +419,7 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
return false;
}
-int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
+int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
const ValueToValueMap &Strides =
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
@@ -582,7 +586,7 @@ bool LoopVectorizationLegality::setupOuterLoopInductions() {
/// Checks if a function is scalarizable according to the TLI, in
/// the sense that it should be vectorized and then expanded in
-/// multiple scalarcalls. This is represented in the
+/// multiple scalar calls. This is represented in the
/// TLI via mappings that do not specify a vector name, as in the
/// following example:
///
@@ -594,22 +598,24 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
// Check that all known VFs are not associated to a vector
// function, i.e. the vector name is emty.
- if (Scalarize)
- for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName);
- VF <= WidestVF; VF *= 2) {
+ if (Scalarize) {
+ ElementCount WidestFixedVF, WidestScalableVF;
+ TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
+ for (ElementCount VF = ElementCount::getFixed(2);
+ ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
- }
+ for (ElementCount VF = ElementCount::getScalable(1);
+ ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
+ Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
+ assert((WidestScalableVF.isZero() || !Scalarize) &&
+ "Caller may decide to scalarize a variant using a scalable VF");
+ }
return Scalarize;
}
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *Header = TheLoop->getHeader();
- // Look for the attribute signaling the absence of NaNs.
- Function &F = *Header->getParent();
- HasFunNoNaNAttr =
- F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
-
// For each block in the loop.
for (BasicBlock *BB : TheLoop->blocks()) {
// Scan the instructions in the block and look for hazards.
@@ -649,8 +655,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
RecurrenceDescriptor RedDes;
if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
DT)) {
- if (RedDes.hasUnsafeAlgebra())
- Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
+ Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
AllowedExit.insert(RedDes.getLoopExitInstr());
Reductions[Phi] = RedDes;
continue;
@@ -673,8 +678,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
InductionDescriptor ID;
if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
addInductionPhi(Phi, ID, AllowedExit);
- if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
- Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
+ Requirements->addExactFPMathInst(ID.getExactFPMathInst());
continue;
}
@@ -881,6 +885,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
"loop not vectorized: ", *LAR);
});
}
+
if (!LAI->canVectorizeMemory())
return false;
@@ -890,12 +895,38 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
"CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
return false;
}
+
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
PSE.addPredicate(LAI->getPSE().getUnionPredicate());
-
return true;
}
+bool LoopVectorizationLegality::canVectorizeFPMath(
+ bool EnableStrictReductions) {
+
+ // First check if there is any ExactFP math or if we allow reassociations
+ if (!Requirements->getExactFPInst() || Hints->allowReordering())
+ return true;
+
+ // If the above is false, we have ExactFPMath & do not allow reordering.
+ // If the EnableStrictReductions flag is set, first check if we have any
+ // Exact FP induction vars, which we cannot vectorize.
+ if (!EnableStrictReductions ||
+ any_of(getInductionVars(), [&](auto &Induction) -> bool {
+ InductionDescriptor IndDesc = Induction.second;
+ return IndDesc.getExactFPMathInst();
+ }))
+ return false;
+
+ // We can now only vectorize if all reductions with Exact FP math also
+ // have the isOrdered flag set, which indicates that we can move the
+ // reduction operations in-loop.
+ return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
+ const RecurrenceDescriptor &RdxDesc = Reduction.second;
+ return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
+ }));
+}
+
bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
Value *In0 = const_cast<Value *>(V);
PHINode *PN = dyn_cast_or_null<PHINode>(In0);
@@ -918,17 +949,14 @@ bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
return FirstOrderRecurrences.count(Phi);
}
-bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
+bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
}
bool LoopVectorizationLegality::blockCanBePredicated(
BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
SmallPtrSetImpl<const Instruction *> &MaskedOp,
- SmallPtrSetImpl<Instruction *> &ConditionalAssumes,
- bool PreserveGuards) const {
- const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
-
+ SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
for (Instruction &I : *BB) {
// Check that we don't have a constant expression that can trap as operand.
for (Value *Operand : I.operands()) {
@@ -956,11 +984,7 @@ bool LoopVectorizationLegality::blockCanBePredicated(
if (!LI)
return false;
if (!SafePtrs.count(LI->getPointerOperand())) {
- // !llvm.mem.parallel_loop_access implies if-conversion safety.
- // Otherwise, record that the load needs (real or emulated) masking
- // and let the cost model decide.
- if (!IsAnnotatedParallel || PreserveGuards)
- MaskedOp.insert(LI);
+ MaskedOp.insert(LI);
continue;
}
}
@@ -1101,21 +1125,6 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
return false;
}
- // We currently must have a single "exit block" after the loop. Note that
- // multiple "exiting blocks" inside the loop are allowed, provided they all
- // reach the single exit block.
- // TODO: This restriction can be relaxed in the near future, it's here solely
- // to allow separation of changes for review. We need to generalize the phi
- // update logic in a number of places.
- if (!Lp->getUniqueExitBlock()) {
- reportVectorizationFailure("The loop must have a unique exit block",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop);
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
return Result;
}
@@ -1276,8 +1285,7 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
// do not need predication such as the header block.
for (BasicBlock *BB : TheLoop->blocks()) {
if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
- TmpConditionalAssumes,
- /* MaskAllLoads= */ true)) {
+ TmpConditionalAssumes)) {
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
return false;
}