src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-10-23 17:51:42 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-10-23 17:51:42 +0000
commit	1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree	2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
parent	e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
download	src-1d5ae1026e831016fc29fd927877c86af904481f.tar.gz src-1d5ae1026e831016fc29fd927877c86af904481f.zip

vendor/llvm/llvm-trunk-r375505 vendor/llvm

Notes

Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorizationLegality.cpp')

-rw-r--r--

lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

186

1 files changed, 89 insertions, 97 deletions

diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 6ef8dc2d3cd7..f43842be5357 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

@@ -13,7 +13,10 @@

// pass. It should be easy to create an analysis pass around it if there

// is a need (but D45420 needs to happen first).

+#include "llvm/Transforms/Vectorize/LoopVectorize.h"

#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"

+#include "llvm/Analysis/Loads.h"

+#include "llvm/Analysis/ValueTracking.h"

#include "llvm/Analysis/VectorUtils.h"

#include "llvm/IR/IntrinsicInst.h"

@@ -47,38 +50,6 @@ static const unsigned MaxInterleaveFactor = 16;

namespace llvm {

-#ifndef NDEBUG

-static void debugVectorizationFailure(const StringRef DebugMsg,

- Instruction *I) {

- dbgs() << "LV: Not vectorizing: " << DebugMsg;

- if (I != nullptr)

- dbgs() << " " << *I;

- else

- dbgs() << '.';

- dbgs() << '\n';

-#endif

-OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,

- StringRef RemarkName,

- Loop *TheLoop,

- Instruction *I) {

- Value *CodeRegion = TheLoop->getHeader();

- DebugLoc DL = TheLoop->getStartLoc();

- if (I) {

- CodeRegion = I->getParent();

- // If there is no debug location attached to the instruction, revert back to

- // using the loop's.

- if (I->getDebugLoc())

- DL = I->getDebugLoc();

- }

- OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);

- R << "loop not vectorized: ";

- return R;

bool LoopVectorizeHints::Hint::validate(unsigned Val) {

switch (Kind) {

case HK_WIDTH:

@@ -88,6 +59,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {

case HK_FORCE:

return (Val <= 1);

case HK_ISVECTORIZED:

+ case HK_PREDICATE:

return (Val == 0 || Val == 1);

}

return false;

@@ -99,7 +71,9 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,

: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),

Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),

Force("vectorize.enable", FK_Undefined, HK_FORCE),

- IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {

+ IsVectorized("isvectorized", 0, HK_ISVECTORIZED),

+ Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),

+ ORE(ORE) {

// Populate values with existing loop metadata.

getHintsFromMetadata();

@@ -250,7 +224,7 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {

return;

unsigned Val = C->getZExtValue();

- Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};

+ Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};

for (auto H : Hints) {

if (Name == H->Name) {

if (H->validate(Val))

@@ -435,7 +409,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {

const ValueToValueMap &Strides =

getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();

- int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);

+ bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();

+ int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);

if (Stride == 1 || Stride == -1)

return Stride;

return 0;

@@ -445,14 +420,6 @@ bool LoopVectorizationLegality::isUniform(Value *V) {

return LAI->isUniform(V);

}

-void LoopVectorizationLegality::reportVectorizationFailure(

- const StringRef DebugMsg, const StringRef OREMsg,

- const StringRef ORETag, Instruction *I) const {

- LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));

- ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),

- ORETag, TheLoop, I) << OREMsg);

bool LoopVectorizationLegality::canVectorizeOuterLoop() {

assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");

// Store the result and return it at the end instead of exiting early, in case

@@ -467,7 +434,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {

if (!Br) {

reportVectorizationFailure("Unsupported basic block terminator",

"loop control flow is not understood by vectorizer",

- "CFGNotUnderstood");

+ "CFGNotUnderstood", ORE, TheLoop);

if (DoExtraAnalysis)

Result = false;

else

@@ -486,7 +453,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {

!LI->isLoopHeader(Br->getSuccessor(1))) {

reportVectorizationFailure("Unsupported conditional branch",

"loop control flow is not understood by vectorizer",

- "CFGNotUnderstood");

+ "CFGNotUnderstood", ORE, TheLoop);

if (DoExtraAnalysis)

Result = false;

else

@@ -500,7 +467,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {

TheLoop /*context outer loop*/)) {

reportVectorizationFailure("Outer loop contains divergent loops",

"loop control flow is not understood by vectorizer",

- "CFGNotUnderstood");

+ "CFGNotUnderstood", ORE, TheLoop);

if (DoExtraAnalysis)

Result = false;

else

@@ -511,7 +478,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {

if (!setupOuterLoopInductions()) {

reportVectorizationFailure("Unsupported outer loop Phi(s)",

"Unsupported outer loop Phi(s)",

- "UnsupportedPhi");

+ "UnsupportedPhi", ORE, TheLoop);

if (DoExtraAnalysis)

Result = false;

else

@@ -618,7 +585,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

!PhiTy->isPointerTy()) {

reportVectorizationFailure("Found a non-int non-pointer PHI",

"loop control flow is not understood by vectorizer",

- "CFGNotUnderstood");

+ "CFGNotUnderstood", ORE, TheLoop);

return false;

}

@@ -631,6 +598,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

// Unsafe cyclic dependencies with header phis are identified during

// legalization for reduction, induction and first order

// recurrences.

+ AllowedExit.insert(&I);

continue;

}

@@ -638,7 +606,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

if (Phi->getNumIncomingValues() != 2) {

reportVectorizationFailure("Found an invalid PHI",

"loop control flow is not understood by vectorizer",

- "CFGNotUnderstood", Phi);

+ "CFGNotUnderstood", ORE, TheLoop, Phi);

return false;

}

@@ -690,7 +658,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

reportVectorizationFailure("Found an unidentified PHI",

"value that could not be identified as "

"reduction is used outside the loop",

- "NonReductionValueUsedOutsideLoop", Phi);

+ "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);

return false;

} // end of PHI handling

@@ -721,11 +689,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

"library call cannot be vectorized. "

"Try compiling with -fno-math-errno, -ffast-math, "

"or similar flags",

- "CantVectorizeLibcall", CI);

+ "CantVectorizeLibcall", ORE, TheLoop, CI);

} else {

reportVectorizationFailure("Found a non-intrinsic callsite",

"call instruction cannot be vectorized",

- "CantVectorizeLibcall", CI);

+ "CantVectorizeLibcall", ORE, TheLoop, CI);

}

return false;

}

@@ -740,7 +708,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {

reportVectorizationFailure("Found unvectorizable intrinsic",

"intrinsic instruction cannot be vectorized",

- "CantVectorizeIntrinsic", CI);

+ "CantVectorizeIntrinsic", ORE, TheLoop, CI);

return false;

}

@@ -753,7 +721,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

isa<ExtractElementInst>(I)) {

reportVectorizationFailure("Found unvectorizable type",

"instruction return type cannot be vectorized",

- "CantVectorizeInstructionReturnType", &I);

+ "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);

return false;

}

@@ -763,7 +731,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

if (!VectorType::isValidElementType(T)) {

reportVectorizationFailure("Store instruction cannot be vectorized",

"store instruction cannot be vectorized",

- "CantVectorizeStore", ST);

+ "CantVectorizeStore", ORE, TheLoop, ST);

return false;

}

@@ -773,12 +741,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

// Arbitrarily try a vector of 2 elements.

Type *VecTy = VectorType::get(T, /*NumElements=*/2);

assert(VecTy && "did not find vectorized version of stored type");

- unsigned Alignment = getLoadStoreAlignment(ST);

- if (!TTI->isLegalNTStore(VecTy, Alignment)) {

+ const MaybeAlign Alignment = getLoadStoreAlignment(ST);

+ assert(Alignment && "Alignment should be set");

+ if (!TTI->isLegalNTStore(VecTy, *Alignment)) {

reportVectorizationFailure(

"nontemporal store instruction cannot be vectorized",

- "CantVectorizeNontemporalStore", ST);

+ "CantVectorizeNontemporalStore", ORE, TheLoop, ST);

return false;

}

@@ -789,12 +758,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

// supported on the target (arbitrarily try a vector of 2 elements).

Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);

assert(VecTy && "did not find vectorized version of load type");

- unsigned Alignment = getLoadStoreAlignment(LD);

- if (!TTI->isLegalNTLoad(VecTy, Alignment)) {

+ const MaybeAlign Alignment = getLoadStoreAlignment(LD);

+ assert(Alignment && "Alignment should be set");

+ if (!TTI->isLegalNTLoad(VecTy, *Alignment)) {

reportVectorizationFailure(

"nontemporal load instruction cannot be vectorized",

- "CantVectorizeNontemporalLoad", LD);

+ "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);

return false;

}

@@ -823,7 +793,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

}

reportVectorizationFailure("Value cannot be used outside the loop",

"value cannot be used outside the loop",

- "ValueUsedOutsideLoop", &I);

+ "ValueUsedOutsideLoop", ORE, TheLoop, &I);

return false;

}

} // next instr.

@@ -833,12 +803,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

if (Inductions.empty()) {

reportVectorizationFailure("Did not find one integer induction var",

"loop induction variable could not be identified",

- "NoInductionVariable");

+ "NoInductionVariable", ORE, TheLoop);

return false;

} else if (!WidestIndTy) {

reportVectorizationFailure("Did not find one integer induction var",

"integer loop induction variable could not be identified",

- "NoIntegerInductionVariable");

+ "NoIntegerInductionVariable", ORE, TheLoop);

return false;

} else {

LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");

@@ -869,7 +839,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {

if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {

reportVectorizationFailure("Stores to a uniform address",

"write to a loop invariant address could not be vectorized",

- "CantVectorizeStoreToLoopInvariantAddress");

+ "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);

return false;

}

Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());

@@ -905,7 +875,7 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {

}

bool LoopVectorizationLegality::blockCanBePredicated(

- BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {

+ BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {

const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();

for (Instruction &I : *BB) {

@@ -924,7 +894,7 @@ bool LoopVectorizationLegality::blockCanBePredicated(

// !llvm.mem.parallel_loop_access implies if-conversion safety.

// Otherwise, record that the load needs (real or emulated) masking

// and let the cost model decide.

- if (!IsAnnotatedParallel)

+ if (!IsAnnotatedParallel || PreserveGuards)

MaskedOp.insert(LI);

continue;

}

@@ -953,23 +923,41 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {

if (!EnableIfConversion) {

reportVectorizationFailure("If-conversion is disabled",

"if-conversion is disabled",

- "IfConversionDisabled");

+ "IfConversionDisabled",

+ ORE, TheLoop);

return false;

}

assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");

- // A list of pointers that we can safely read and write to.

+ // A list of pointers which are known to be dereferenceable within scope of

+ // the loop body for each iteration of the loop which executes. That is,

+ // the memory pointed to can be dereferenced (with the access size implied by

+ // the value's type) unconditionally within the loop header without

+ // introducing a new fault.

SmallPtrSet<Value *, 8> SafePointes;

// Collect safe addresses.

for (BasicBlock *BB : TheLoop->blocks()) {

- if (blockNeedsPredication(BB))

+ if (!blockNeedsPredication(BB)) {

+ for (Instruction &I : *BB)

+ if (auto *Ptr = getLoadStorePointerOperand(&I))

+ SafePointes.insert(Ptr);

continue;

+ }

- for (Instruction &I : *BB)

- if (auto *Ptr = getLoadStorePointerOperand(&I))

- SafePointes.insert(Ptr);

+ // For a block which requires predication, a address may be safe to access

+ // in the loop w/o predication if we can prove dereferenceability facts

+ // sufficient to ensure it'll never fault within the loop. For the moment,

+ // we restrict this to loads; stores are more complicated due to

+ // concurrency restrictions.

+ ScalarEvolution &SE = *PSE.getSE();

+ for (Instruction &I : *BB) {

+ LoadInst *LI = dyn_cast<LoadInst>(&I);

+ if (LI && !mustSuppressSpeculation(*LI) &&

+ isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))

+ SafePointes.insert(LI->getPointerOperand());

+ }

}

// Collect the blocks that need predication.

@@ -979,7 +967,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {

if (!isa<BranchInst>(BB->getTerminator())) {

reportVectorizationFailure("Loop contains a switch statement",

"loop contains a switch statement",

- "LoopContainsSwitch", BB->getTerminator());

+ "LoopContainsSwitch", ORE, TheLoop,

+ BB->getTerminator());

return false;

}

@@ -989,14 +978,16 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {

reportVectorizationFailure(

"Control flow cannot be substituted for a select",

"control flow cannot be substituted for a select",

- "NoCFGForSelect", BB->getTerminator());

+ "NoCFGForSelect", ORE, TheLoop,

+ BB->getTerminator());

return false;

}

} else if (BB != Header && !canIfConvertPHINodes(BB)) {

reportVectorizationFailure(

"Control flow cannot be substituted for a select",

"control flow cannot be substituted for a select",

- "NoCFGForSelect", BB->getTerminator());

+ "NoCFGForSelect", ORE, TheLoop,

+ BB->getTerminator());

return false;

}

@@ -1026,7 +1017,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,

if (!Lp->getLoopPreheader()) {

reportVectorizationFailure("Loop doesn't have a legal pre-header",

"loop control flow is not understood by vectorizer",

- "CFGNotUnderstood");

+ "CFGNotUnderstood", ORE, TheLoop);

if (DoExtraAnalysis)

Result = false;

else

@@ -1037,7 +1028,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,

if (Lp->getNumBackEdges() != 1) {

reportVectorizationFailure("The loop must have a single backedge",

"loop control flow is not understood by vectorizer",

- "CFGNotUnderstood");

+ "CFGNotUnderstood", ORE, TheLoop);

if (DoExtraAnalysis)

Result = false;

else

@@ -1048,7 +1039,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,

if (!Lp->getExitingBlock()) {

reportVectorizationFailure("The loop must have an exiting block",

"loop control flow is not understood by vectorizer",

- "CFGNotUnderstood");

+ "CFGNotUnderstood", ORE, TheLoop);

if (DoExtraAnalysis)

Result = false;

else

@@ -1061,7 +1052,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,

if (Lp->getExitingBlock() != Lp->getLoopLatch()) {

reportVectorizationFailure("The exiting block is not the loop latch",

"loop control flow is not understood by vectorizer",

- "CFGNotUnderstood");

+ "CFGNotUnderstood", ORE, TheLoop);

if (DoExtraAnalysis)

Result = false;

else

@@ -1124,7 +1115,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {

if (!canVectorizeOuterLoop()) {

reportVectorizationFailure("Unsupported outer loop",

"unsupported outer loop",

- "UnsupportedOuterLoop");

+ "UnsupportedOuterLoop",

+ ORE, TheLoop);

// TODO: Implement DoExtraAnalysis when subsequent legal checks support

// outer loops.

return false;

@@ -1176,7 +1168,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {

if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {

reportVectorizationFailure("Too many SCEV checks needed",

"Too many SCEV assumptions need to be made and checked at runtime",

- "TooManySCEVRunTimeChecks");

+ "TooManySCEVRunTimeChecks", ORE, TheLoop);

if (DoExtraAnalysis)

Result = false;

else

@@ -1190,7 +1182,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {

return Result;

}

-bool LoopVectorizationLegality::canFoldTailByMasking() {

+bool LoopVectorizationLegality::prepareToFoldTailByMasking() {

LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");

@@ -1199,22 +1191,21 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {

"No primary induction, cannot fold tail by masking",

"Missing a primary induction variable in the loop, which is "

"needed in order to fold tail by masking as required.",

- "NoPrimaryInduction");

+ "NoPrimaryInduction", ORE, TheLoop);

return false;

}

- // TODO: handle reductions when tail is folded by masking.

- if (!Reductions.empty()) {

- reportVectorizationFailure(

- "Loop has reductions, cannot fold tail by masking",

- "Cannot fold tail by masking in the presence of reductions.",

- "ReductionFoldingTailByMasking");

- return false;

- }

+ SmallPtrSet<const Value *, 8> ReductionLiveOuts;

- // TODO: handle outside users when tail is folded by masking.

+ for (auto &Reduction : *getReductionVars())

+ ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());

+ // TODO: handle non-reduction outside users when tail is folded by masking.

for (auto *AE : AllowedExit) {

- // Check that all users of allowed exit values are inside the loop.

+ // Check that all users of allowed exit values are inside the loop or

+ // are the live-out of a reduction.

+ if (ReductionLiveOuts.count(AE))

+ continue;

for (User *U : AE->users()) {

Instruction *UI = cast<Instruction>(U);

if (TheLoop->contains(UI))

@@ -1222,7 +1213,7 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {

reportVectorizationFailure(

"Cannot fold tail by masking, loop has an outside user for",

"Cannot fold tail by masking in the presence of live outs.",

- "LiveOutFoldingTailByMasking", UI);

+ "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);

return false;

}

@@ -1233,11 +1224,12 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {

// Check and mark all blocks for predication, including those that ordinarily

// do not need predication such as the header block.

for (BasicBlock *BB : TheLoop->blocks()) {

- if (!blockCanBePredicated(BB, SafePointers)) {

+ if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {

reportVectorizationFailure(

"Cannot fold tail by masking as required",

"control flow cannot be substituted for a select",

- "NoCFGForSelect", BB->getTerminator());

+ "NoCFGForSelect", ORE, TheLoop,

+ BB->getTerminator());

return false;

}