summaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
commitd8e91e46262bc44006913e6796843909f1ac7bcd (patch)
tree7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
parentb7eb8e35e481a74962664b63dfb09483b200209a (diff)
Notes
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorizationLegality.cpp')
-rw-r--r--lib/Transforms/Vectorize/LoopVectorizationLegality.cpp186
1 files changed, 163 insertions, 23 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 697bc1b448d7..b44fe5a52a2f 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -80,10 +80,11 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
return false;
}
-LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
+LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
+ bool InterleaveOnlyWhenForced,
OptimizationRemarkEmitter &ORE)
: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
- Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
+ Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
// Populate values with existing loop metadata.
@@ -98,19 +99,19 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
// consider the loop to have been already vectorized because there's
// nothing more that we can do.
IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
- LLVM_DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
+ LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
<< "LV: Interleaving disabled by the pass manager\n");
}
-bool LoopVectorizeHints::allowVectorization(Function *F, Loop *L,
- bool AlwaysVectorize) const {
+bool LoopVectorizeHints::allowVectorization(
+ Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
if (getForce() == LoopVectorizeHints::FK_Disabled) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
emitRemarkWithHints();
return false;
}
- if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
+ if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
emitRemarkWithHints();
return false;
@@ -434,7 +435,7 @@ static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
/// identified reduction variable.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
SmallPtrSetImpl<Value *> &AllowedExit) {
- // Reduction and Induction instructions are allowed to have exit users. All
+ // Reductions, Inductions and non-header phis are allowed to have exit users. All
// other instructions must not have external users.
if (!AllowedExit.count(Inst))
// Check that all of the users of the loop are inside the BB.
@@ -516,6 +517,18 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
return false;
}
+ // Check whether we are able to set up outer loop induction.
+ if (!setupOuterLoopInductions()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Not vectorizing: Unsupported outer loop Phi(s).\n");
+ ORE->emit(createMissedAnalysis("UnsupportedPhi")
+ << "Unsupported outer loop Phi(s)");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
return Result;
}
@@ -561,7 +574,8 @@ void LoopVectorizationLegality::addInductionPhi(
// back into the PHI node may have external users.
// We can allow those uses, except if the SCEVs we have for them rely
// on predicates that only hold within the loop, since allowing the exit
- // currently means re-using this SCEV outside the loop.
+ // currently means re-using this SCEV outside the loop (see PR33706 for more
+ // details).
if (PSE.getUnionPredicate().isAlwaysTrue()) {
AllowedExit.insert(Phi);
AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
@@ -570,6 +584,32 @@ void LoopVectorizationLegality::addInductionPhi(
LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
}
+bool LoopVectorizationLegality::setupOuterLoopInductions() {
+ BasicBlock *Header = TheLoop->getHeader();
+
+ // Returns true if a given Phi is a supported induction.
+ auto isSupportedPhi = [&](PHINode &Phi) -> bool {
+ InductionDescriptor ID;
+ if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
+ ID.getKind() == InductionDescriptor::IK_IntInduction) {
+ addInductionPhi(&Phi, ID, AllowedExit);
+ return true;
+ } else {
+ // Bail out for any Phi in the outer loop header that is not a supported
+ // induction.
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Found unsupported PHI for outer loop vectorization.\n");
+ return false;
+ }
+ };
+
+ if (llvm::all_of(Header->phis(), isSupportedPhi))
+ return true;
+ else
+ return false;
+}
+
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *Header = TheLoop->getHeader();
@@ -597,14 +637,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// can convert it to select during if-conversion. No need to check if
// the PHIs in this block are induction or reduction variables.
if (BB != Header) {
- // Check that this instruction has no outside users or is an
- // identified reduction value with an outside user.
- if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit))
- continue;
- ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi)
- << "value could not be identified as "
- "an induction or reduction variable");
- return false;
+ // Non-header phi nodes that have outside uses can be vectorized. Add
+ // them to the list of allowed exits.
+ // Unsafe cyclic dependencies with header phis are identified during
+ // legalization for reduction, induction and first order
+ // recurrences.
+ continue;
}
// We only allow if-converted PHIs with exactly two incoming values.
@@ -625,6 +663,20 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
+ // TODO: Instead of recording the AllowedExit, it would be good to record the
+ // complementary set: NotAllowedExit. These include (but may not be
+ // limited to):
+ // 1. Reduction phis as they represent the one-before-last value, which
+ // is not available when vectorized
+ // 2. Induction phis and increment when SCEV predicates cannot be used
+ // outside the loop - see addInductionPhi
+ // 3. Non-Phis with outside uses when SCEV predicates cannot be used
+ // outside the loop - see call to hasOutsideLoopUser in the non-phi
+ // handling below
+ // 4. FirstOrderRecurrence phis that can possibly be handled by
+ // extraction.
+ // By recording these, we can then reason about ways to vectorize each
+ // of these NotAllowedExit.
InductionDescriptor ID;
if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
addInductionPhi(Phi, ID, AllowedExit);
@@ -662,10 +714,30 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
!isa<DbgInfoIntrinsic>(CI) &&
!(CI->getCalledFunction() && TLI &&
TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
- ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
- << "call instruction cannot be vectorized");
+ // If the call is a recognized math libary call, it is likely that
+ // we can vectorize it given loosened floating-point constraints.
+ LibFunc Func;
+ bool IsMathLibCall =
+ TLI && CI->getCalledFunction() &&
+ CI->getType()->isFloatingPointTy() &&
+ TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+ TLI->hasOptimizedCodeGen(Func);
+
+ if (IsMathLibCall) {
+ // TODO: Ideally, we should not use clang-specific language here,
+ // but it's hard to provide meaningful yet generic advice.
+ // Also, should this be guarded by allowExtraAnalysis() and/or be part
+ // of the returned info from isFunctionVectorizable()?
+ ORE->emit(createMissedAnalysis("CantVectorizeLibcall", CI)
+ << "library call cannot be vectorized. "
+ "Try compiling with -fno-math-errno, -ffast-math, "
+ "or similar flags");
+ } else {
+ ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
+ << "call instruction cannot be vectorized");
+ }
LLVM_DEBUG(
- dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
+ dbgs() << "LV: Found a non-intrinsic callsite.\n");
return false;
}
@@ -717,6 +789,14 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
+ // We can safely vectorize loops where instructions within the loop are
+ // used outside the loop only if the SCEV predicates within the loop is
+ // same as outside the loop. Allowing the exit means reusing the SCEV
+ // outside the loop.
+ if (PSE.getUnionPredicate().isAlwaysTrue()) {
+ AllowedExit.insert(&I);
+ continue;
+ }
ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
<< "value cannot be used outside the loop");
return false;
@@ -730,6 +810,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
ORE->emit(createMissedAnalysis("NoInductionVariable")
<< "loop induction variable could not be identified");
return false;
+ } else if (!WidestIndTy) {
+ ORE->emit(createMissedAnalysis("NoIntegerInductionVariable")
+ << "integer loop induction variable could not be identified");
+ return false;
}
}
@@ -754,13 +838,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (!LAI->canVectorizeMemory())
return false;
- if (LAI->hasStoreToLoopInvariantAddress()) {
+ if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
- << "write to a loop invariant address could not be vectorized");
- LLVM_DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+ << "write to a loop invariant address could not "
+ "be vectorized");
+ LLVM_DEBUG(
+ dbgs() << "LV: Non vectorizable stores to a uniform address\n");
return false;
}
-
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
PSE.addPredicate(LAI->getPSE().getUnionPredicate());
@@ -1069,4 +1154,59 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return Result;
}
+bool LoopVectorizationLegality::canFoldTailByMasking() {
+
+ LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
+
+ if (!PrimaryInduction) {
+ ORE->emit(createMissedAnalysis("NoPrimaryInduction")
+ << "Missing a primary induction variable in the loop, which is "
+ << "needed in order to fold tail by masking as required.");
+ LLVM_DEBUG(dbgs() << "LV: No primary induction, cannot fold tail by "
+ << "masking.\n");
+ return false;
+ }
+
+ // TODO: handle reductions when tail is folded by masking.
+ if (!Reductions.empty()) {
+ ORE->emit(createMissedAnalysis("ReductionFoldingTailByMasking")
+ << "Cannot fold tail by masking in the presence of reductions.");
+ LLVM_DEBUG(dbgs() << "LV: Loop has reductions, cannot fold tail by "
+ << "masking.\n");
+ return false;
+ }
+
+ // TODO: handle outside users when tail is folded by masking.
+ for (auto *AE : AllowedExit) {
+ // Check that all users of allowed exit values are inside the loop.
+ for (User *U : AE->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (TheLoop->contains(UI))
+ continue;
+ ORE->emit(createMissedAnalysis("LiveOutFoldingTailByMasking")
+ << "Cannot fold tail by masking in the presence of live outs.");
+ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop has an "
+ << "outside user for : " << *UI << '\n');
+ return false;
+ }
+ }
+
+ // The list of pointers that we can safely read and write to remains empty.
+ SmallPtrSet<Value *, 8> SafePointers;
+
+ // Check and mark all blocks for predication, including those that ordinarily
+ // do not need predication such as the header block.
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ if (!blockCanBePredicated(BB, SafePointers)) {
+ ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
+ << "control flow cannot be substituted for a select");
+ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as required.\n");
+ return false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
+ return true;
+}
+
} // namespace llvm