aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
commit1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
parente6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
downloadsrc-1d5ae1026e831016fc29fd927877c86af904481f.tar.gz
src-1d5ae1026e831016fc29fd927877c86af904481f.zip
Notes
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorizationLegality.cpp')
-rw-r--r--lib/Transforms/Vectorize/LoopVectorizationLegality.cpp186
1 files changed, 89 insertions, 97 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 6ef8dc2d3cd7..f43842be5357 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -13,7 +13,10 @@
// pass. It should be easy to create an analysis pass around it if there
// is a need (but D45420 needs to happen first).
//
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -47,38 +50,6 @@ static const unsigned MaxInterleaveFactor = 16;
namespace llvm {
-#ifndef NDEBUG
-static void debugVectorizationFailure(const StringRef DebugMsg,
- Instruction *I) {
- dbgs() << "LV: Not vectorizing: " << DebugMsg;
- if (I != nullptr)
- dbgs() << " " << *I;
- else
- dbgs() << '.';
- dbgs() << '\n';
-}
-#endif
-
-OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
- StringRef RemarkName,
- Loop *TheLoop,
- Instruction *I) {
- Value *CodeRegion = TheLoop->getHeader();
- DebugLoc DL = TheLoop->getStartLoc();
-
- if (I) {
- CodeRegion = I->getParent();
- // If there is no debug location attached to the instruction, revert back to
- // using the loop's.
- if (I->getDebugLoc())
- DL = I->getDebugLoc();
- }
-
- OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
- R << "loop not vectorized: ";
- return R;
-}
-
bool LoopVectorizeHints::Hint::validate(unsigned Val) {
switch (Kind) {
case HK_WIDTH:
@@ -88,6 +59,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
case HK_FORCE:
return (Val <= 1);
case HK_ISVECTORIZED:
+ case HK_PREDICATE:
return (Val == 0 || Val == 1);
}
return false;
@@ -99,7 +71,9 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
- IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
+ IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
+ Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
+ ORE(ORE) {
// Populate values with existing loop metadata.
getHintsFromMetadata();
@@ -250,7 +224,7 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
return;
unsigned Val = C->getZExtValue();
- Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
+ Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
for (auto H : Hints) {
if (Name == H->Name) {
if (H->validate(Val))
@@ -435,7 +409,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
const ValueToValueMap &Strides =
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
- int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
+ bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
+ int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
@@ -445,14 +420,6 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
return LAI->isUniform(V);
}
-void LoopVectorizationLegality::reportVectorizationFailure(
- const StringRef DebugMsg, const StringRef OREMsg,
- const StringRef ORETag, Instruction *I) const {
- LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
- ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
- ORETag, TheLoop, I) << OREMsg);
-}
-
bool LoopVectorizationLegality::canVectorizeOuterLoop() {
assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
// Store the result and return it at the end instead of exiting early, in case
@@ -467,7 +434,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
if (!Br) {
reportVectorizationFailure("Unsupported basic block terminator",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -486,7 +453,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
!LI->isLoopHeader(Br->getSuccessor(1))) {
reportVectorizationFailure("Unsupported conditional branch",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -500,7 +467,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
TheLoop /*context outer loop*/)) {
reportVectorizationFailure("Outer loop contains divergent loops",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -511,7 +478,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
if (!setupOuterLoopInductions()) {
reportVectorizationFailure("Unsupported outer loop Phi(s)",
"Unsupported outer loop Phi(s)",
- "UnsupportedPhi");
+ "UnsupportedPhi", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -618,7 +585,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
!PhiTy->isPointerTy()) {
reportVectorizationFailure("Found a non-int non-pointer PHI",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
return false;
}
@@ -631,6 +598,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Unsafe cyclic dependencies with header phis are identified during
// legalization for reduction, induction and first order
// recurrences.
+ AllowedExit.insert(&I);
continue;
}
@@ -638,7 +606,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (Phi->getNumIncomingValues() != 2) {
reportVectorizationFailure("Found an invalid PHI",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", Phi);
+ "CFGNotUnderstood", ORE, TheLoop, Phi);
return false;
}
@@ -690,7 +658,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
reportVectorizationFailure("Found an unidentified PHI",
"value that could not be identified as "
"reduction is used outside the loop",
- "NonReductionValueUsedOutsideLoop", Phi);
+ "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
return false;
} // end of PHI handling
@@ -721,11 +689,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
"library call cannot be vectorized. "
"Try compiling with -fno-math-errno, -ffast-math, "
"or similar flags",
- "CantVectorizeLibcall", CI);
+ "CantVectorizeLibcall", ORE, TheLoop, CI);
} else {
reportVectorizationFailure("Found a non-intrinsic callsite",
"call instruction cannot be vectorized",
- "CantVectorizeLibcall", CI);
+ "CantVectorizeLibcall", ORE, TheLoop, CI);
}
return false;
}
@@ -740,7 +708,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
reportVectorizationFailure("Found unvectorizable intrinsic",
"intrinsic instruction cannot be vectorized",
- "CantVectorizeIntrinsic", CI);
+ "CantVectorizeIntrinsic", ORE, TheLoop, CI);
return false;
}
}
@@ -753,7 +721,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
isa<ExtractElementInst>(I)) {
reportVectorizationFailure("Found unvectorizable type",
"instruction return type cannot be vectorized",
- "CantVectorizeInstructionReturnType", &I);
+ "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
return false;
}
@@ -763,7 +731,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!VectorType::isValidElementType(T)) {
reportVectorizationFailure("Store instruction cannot be vectorized",
"store instruction cannot be vectorized",
- "CantVectorizeStore", ST);
+ "CantVectorizeStore", ORE, TheLoop, ST);
return false;
}
@@ -773,12 +741,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Arbitrarily try a vector of 2 elements.
Type *VecTy = VectorType::get(T, /*NumElements=*/2);
assert(VecTy && "did not find vectorized version of stored type");
- unsigned Alignment = getLoadStoreAlignment(ST);
- if (!TTI->isLegalNTStore(VecTy, Alignment)) {
+ const MaybeAlign Alignment = getLoadStoreAlignment(ST);
+ assert(Alignment && "Alignment should be set");
+ if (!TTI->isLegalNTStore(VecTy, *Alignment)) {
reportVectorizationFailure(
"nontemporal store instruction cannot be vectorized",
"nontemporal store instruction cannot be vectorized",
- "CantVectorizeNontemporalStore", ST);
+ "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
return false;
}
}
@@ -789,12 +758,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// supported on the target (arbitrarily try a vector of 2 elements).
Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
assert(VecTy && "did not find vectorized version of load type");
- unsigned Alignment = getLoadStoreAlignment(LD);
- if (!TTI->isLegalNTLoad(VecTy, Alignment)) {
+ const MaybeAlign Alignment = getLoadStoreAlignment(LD);
+ assert(Alignment && "Alignment should be set");
+ if (!TTI->isLegalNTLoad(VecTy, *Alignment)) {
reportVectorizationFailure(
"nontemporal load instruction cannot be vectorized",
"nontemporal load instruction cannot be vectorized",
- "CantVectorizeNontemporalLoad", LD);
+ "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
return false;
}
}
@@ -823,7 +793,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
reportVectorizationFailure("Value cannot be used outside the loop",
"value cannot be used outside the loop",
- "ValueUsedOutsideLoop", &I);
+ "ValueUsedOutsideLoop", ORE, TheLoop, &I);
return false;
}
} // next instr.
@@ -833,12 +803,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (Inductions.empty()) {
reportVectorizationFailure("Did not find one integer induction var",
"loop induction variable could not be identified",
- "NoInductionVariable");
+ "NoInductionVariable", ORE, TheLoop);
return false;
} else if (!WidestIndTy) {
reportVectorizationFailure("Did not find one integer induction var",
"integer loop induction variable could not be identified",
- "NoIntegerInductionVariable");
+ "NoIntegerInductionVariable", ORE, TheLoop);
return false;
} else {
LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
@@ -869,7 +839,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
reportVectorizationFailure("Stores to a uniform address",
"write to a loop invariant address could not be vectorized",
- "CantVectorizeStoreToLoopInvariantAddress");
+ "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
return false;
}
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
@@ -905,7 +875,7 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
}
bool LoopVectorizationLegality::blockCanBePredicated(
- BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
+ BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
for (Instruction &I : *BB) {
@@ -924,7 +894,7 @@ bool LoopVectorizationLegality::blockCanBePredicated(
// !llvm.mem.parallel_loop_access implies if-conversion safety.
// Otherwise, record that the load needs (real or emulated) masking
// and let the cost model decide.
- if (!IsAnnotatedParallel)
+ if (!IsAnnotatedParallel || PreserveGuards)
MaskedOp.insert(LI);
continue;
}
@@ -953,23 +923,41 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion) {
reportVectorizationFailure("If-conversion is disabled",
"if-conversion is disabled",
- "IfConversionDisabled");
+ "IfConversionDisabled",
+ ORE, TheLoop);
return false;
}
assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
- // A list of pointers that we can safely read and write to.
+ // A list of pointers which are known to be dereferenceable within scope of
+ // the loop body for each iteration of the loop which executes. That is,
+ // the memory pointed to can be dereferenced (with the access size implied by
+ // the value's type) unconditionally within the loop header without
+ // introducing a new fault.
SmallPtrSet<Value *, 8> SafePointes;
// Collect safe addresses.
for (BasicBlock *BB : TheLoop->blocks()) {
- if (blockNeedsPredication(BB))
+ if (!blockNeedsPredication(BB)) {
+ for (Instruction &I : *BB)
+ if (auto *Ptr = getLoadStorePointerOperand(&I))
+ SafePointes.insert(Ptr);
continue;
+ }
- for (Instruction &I : *BB)
- if (auto *Ptr = getLoadStorePointerOperand(&I))
- SafePointes.insert(Ptr);
+ // For a block which requires predication, a address may be safe to access
+ // in the loop w/o predication if we can prove dereferenceability facts
+ // sufficient to ensure it'll never fault within the loop. For the moment,
+ // we restrict this to loads; stores are more complicated due to
+ // concurrency restrictions.
+ ScalarEvolution &SE = *PSE.getSE();
+ for (Instruction &I : *BB) {
+ LoadInst *LI = dyn_cast<LoadInst>(&I);
+ if (LI && !mustSuppressSpeculation(*LI) &&
+ isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
+ SafePointes.insert(LI->getPointerOperand());
+ }
}
// Collect the blocks that need predication.
@@ -979,7 +967,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!isa<BranchInst>(BB->getTerminator())) {
reportVectorizationFailure("Loop contains a switch statement",
"loop contains a switch statement",
- "LoopContainsSwitch", BB->getTerminator());
+ "LoopContainsSwitch", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
@@ -989,14 +978,16 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
reportVectorizationFailure(
"Control flow cannot be substituted for a select",
"control flow cannot be substituted for a select",
- "NoCFGForSelect", BB->getTerminator());
+ "NoCFGForSelect", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
} else if (BB != Header && !canIfConvertPHINodes(BB)) {
reportVectorizationFailure(
"Control flow cannot be substituted for a select",
"control flow cannot be substituted for a select",
- "NoCFGForSelect", BB->getTerminator());
+ "NoCFGForSelect", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
}
@@ -1026,7 +1017,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
if (!Lp->getLoopPreheader()) {
reportVectorizationFailure("Loop doesn't have a legal pre-header",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1037,7 +1028,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
if (Lp->getNumBackEdges() != 1) {
reportVectorizationFailure("The loop must have a single backedge",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1048,7 +1039,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
if (!Lp->getExitingBlock()) {
reportVectorizationFailure("The loop must have an exiting block",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1061,7 +1052,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
reportVectorizationFailure("The exiting block is not the loop latch",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1124,7 +1115,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
if (!canVectorizeOuterLoop()) {
reportVectorizationFailure("Unsupported outer loop",
"unsupported outer loop",
- "UnsupportedOuterLoop");
+ "UnsupportedOuterLoop",
+ ORE, TheLoop);
// TODO: Implement DoExtraAnalysis when subsequent legal checks support
// outer loops.
return false;
@@ -1176,7 +1168,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
reportVectorizationFailure("Too many SCEV checks needed",
"Too many SCEV assumptions need to be made and checked at runtime",
- "TooManySCEVRunTimeChecks");
+ "TooManySCEVRunTimeChecks", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1190,7 +1182,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return Result;
}
-bool LoopVectorizationLegality::canFoldTailByMasking() {
+bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
@@ -1199,22 +1191,21 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
"No primary induction, cannot fold tail by masking",
"Missing a primary induction variable in the loop, which is "
"needed in order to fold tail by masking as required.",
- "NoPrimaryInduction");
+ "NoPrimaryInduction", ORE, TheLoop);
return false;
}
- // TODO: handle reductions when tail is folded by masking.
- if (!Reductions.empty()) {
- reportVectorizationFailure(
- "Loop has reductions, cannot fold tail by masking",
- "Cannot fold tail by masking in the presence of reductions.",
- "ReductionFoldingTailByMasking");
- return false;
- }
+ SmallPtrSet<const Value *, 8> ReductionLiveOuts;
- // TODO: handle outside users when tail is folded by masking.
+ for (auto &Reduction : *getReductionVars())
+ ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
+
+ // TODO: handle non-reduction outside users when tail is folded by masking.
for (auto *AE : AllowedExit) {
- // Check that all users of allowed exit values are inside the loop.
+ // Check that all users of allowed exit values are inside the loop or
+ // are the live-out of a reduction.
+ if (ReductionLiveOuts.count(AE))
+ continue;
for (User *U : AE->users()) {
Instruction *UI = cast<Instruction>(U);
if (TheLoop->contains(UI))
@@ -1222,7 +1213,7 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
reportVectorizationFailure(
"Cannot fold tail by masking, loop has an outside user for",
"Cannot fold tail by masking in the presence of live outs.",
- "LiveOutFoldingTailByMasking", UI);
+ "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
return false;
}
}
@@ -1233,11 +1224,12 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
// Check and mark all blocks for predication, including those that ordinarily
// do not need predication such as the header block.
for (BasicBlock *BB : TheLoop->blocks()) {
- if (!blockCanBePredicated(BB, SafePointers)) {
+ if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
reportVectorizationFailure(
"Cannot fold tail by masking as required",
"control flow cannot be substituted for a select",
- "NoCFGForSelect", BB->getTerminator());
+ "NoCFGForSelect", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
}