diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
commit | 1d5ae1026e831016fc29fd927877c86af904481f (patch) | |
tree | 2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | |
parent | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff) | |
download | src-1d5ae1026e831016fc29fd927877c86af904481f.tar.gz src-1d5ae1026e831016fc29fd927877c86af904481f.zip |
Notes
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorizationLegality.cpp')
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 186 |
1 files changed, 89 insertions, 97 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 6ef8dc2d3cd7..f43842be5357 100644 --- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -13,7 +13,10 @@ // pass. It should be easy to create an analysis pass around it if there // is a need (but D45420 needs to happen first). // +#include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IntrinsicInst.h" @@ -47,38 +50,6 @@ static const unsigned MaxInterleaveFactor = 16; namespace llvm { -#ifndef NDEBUG -static void debugVectorizationFailure(const StringRef DebugMsg, - Instruction *I) { - dbgs() << "LV: Not vectorizing: " << DebugMsg; - if (I != nullptr) - dbgs() << " " << *I; - else - dbgs() << '.'; - dbgs() << '\n'; -} -#endif - -OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, - StringRef RemarkName, - Loop *TheLoop, - Instruction *I) { - Value *CodeRegion = TheLoop->getHeader(); - DebugLoc DL = TheLoop->getStartLoc(); - - if (I) { - CodeRegion = I->getParent(); - // If there is no debug location attached to the instruction, revert back to - // using the loop's. - if (I->getDebugLoc()) - DL = I->getDebugLoc(); - } - - OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion); - R << "loop not vectorized: "; - return R; -} - bool LoopVectorizeHints::Hint::validate(unsigned Val) { switch (Kind) { case HK_WIDTH: @@ -88,6 +59,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) { case HK_FORCE: return (Val <= 1); case HK_ISVECTORIZED: + case HK_PREDICATE: return (Val == 0 || Val == 1); } return false; @@ -99,7 +71,9 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), - IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) { + IsVectorized("isvectorized", 0, HK_ISVECTORIZED), + Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L), + ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); @@ -250,7 +224,7 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { return; unsigned Val = C->getZExtValue(); - Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized}; + Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; for (auto H : Hints) { if (Name == H->Name) { if (H->validate(Val)) @@ -435,7 +409,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); - int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); + bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize(); + int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); if (Stride == 1 || Stride == -1) return Stride; return 0; @@ -445,14 +420,6 @@ bool LoopVectorizationLegality::isUniform(Value *V) { return LAI->isUniform(V); } -void LoopVectorizationLegality::reportVectorizationFailure( - const StringRef DebugMsg, const StringRef OREMsg, - const StringRef ORETag, Instruction *I) const { - LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I)); - ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(), - ORETag, TheLoop, I) << OREMsg); -} - bool LoopVectorizationLegality::canVectorizeOuterLoop() { assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); // Store the result and return it at the end instead of exiting early, in case @@ -467,7 +434,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { if (!Br) { reportVectorizationFailure("Unsupported basic block terminator", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -486,7 +453,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { !LI->isLoopHeader(Br->getSuccessor(1))) { reportVectorizationFailure("Unsupported conditional branch", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -500,7 +467,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { TheLoop /*context outer loop*/)) { reportVectorizationFailure("Outer loop contains divergent loops", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -511,7 +478,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { if (!setupOuterLoopInductions()) { reportVectorizationFailure("Unsupported outer loop Phi(s)", "Unsupported outer loop Phi(s)", - "UnsupportedPhi"); + "UnsupportedPhi", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -618,7 +585,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { !PhiTy->isPointerTy()) { reportVectorizationFailure("Found a non-int non-pointer PHI", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); return false; } @@ -631,6 +598,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Unsafe cyclic dependencies with header phis are identified during // legalization for reduction, induction and first order // recurrences. + AllowedExit.insert(&I); continue; } @@ -638,7 +606,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (Phi->getNumIncomingValues() != 2) { reportVectorizationFailure("Found an invalid PHI", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood", Phi); + "CFGNotUnderstood", ORE, TheLoop, Phi); return false; } @@ -690,7 +658,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { reportVectorizationFailure("Found an unidentified PHI", "value that could not be identified as " "reduction is used outside the loop", - "NonReductionValueUsedOutsideLoop", Phi); + "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); return false; } // end of PHI handling @@ -721,11 +689,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { "library call cannot be vectorized. " "Try compiling with -fno-math-errno, -ffast-math, " "or similar flags", - "CantVectorizeLibcall", CI); + "CantVectorizeLibcall", ORE, TheLoop, CI); } else { reportVectorizationFailure("Found a non-intrinsic callsite", "call instruction cannot be vectorized", - "CantVectorizeLibcall", CI); + "CantVectorizeLibcall", ORE, TheLoop, CI); } return false; } @@ -740,7 +708,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { reportVectorizationFailure("Found unvectorizable intrinsic", "intrinsic instruction cannot be vectorized", - "CantVectorizeIntrinsic", CI); + "CantVectorizeIntrinsic", ORE, TheLoop, CI); return false; } } @@ -753,7 +721,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { isa<ExtractElementInst>(I)) { reportVectorizationFailure("Found unvectorizable type", "instruction return type cannot be vectorized", - "CantVectorizeInstructionReturnType", &I); + "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); return false; } @@ -763,7 +731,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (!VectorType::isValidElementType(T)) { reportVectorizationFailure("Store instruction cannot be vectorized", "store instruction cannot be vectorized", - "CantVectorizeStore", ST); + "CantVectorizeStore", ORE, TheLoop, ST); return false; } @@ -773,12 +741,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Arbitrarily try a vector of 2 elements. Type *VecTy = VectorType::get(T, /*NumElements=*/2); assert(VecTy && "did not find vectorized version of stored type"); - unsigned Alignment = getLoadStoreAlignment(ST); - if (!TTI->isLegalNTStore(VecTy, Alignment)) { + const MaybeAlign Alignment = getLoadStoreAlignment(ST); + assert(Alignment && "Alignment should be set"); + if (!TTI->isLegalNTStore(VecTy, *Alignment)) { reportVectorizationFailure( "nontemporal store instruction cannot be vectorized", "nontemporal store instruction cannot be vectorized", - "CantVectorizeNontemporalStore", ST); + "CantVectorizeNontemporalStore", ORE, TheLoop, ST); return false; } } @@ -789,12 +758,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // supported on the target (arbitrarily try a vector of 2 elements). Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); assert(VecTy && "did not find vectorized version of load type"); - unsigned Alignment = getLoadStoreAlignment(LD); - if (!TTI->isLegalNTLoad(VecTy, Alignment)) { + const MaybeAlign Alignment = getLoadStoreAlignment(LD); + assert(Alignment && "Alignment should be set"); + if (!TTI->isLegalNTLoad(VecTy, *Alignment)) { reportVectorizationFailure( "nontemporal load instruction cannot be vectorized", "nontemporal load instruction cannot be vectorized", - "CantVectorizeNontemporalLoad", LD); + "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); return false; } } @@ -823,7 +793,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { } reportVectorizationFailure("Value cannot be used outside the loop", "value cannot be used outside the loop", - "ValueUsedOutsideLoop", &I); + "ValueUsedOutsideLoop", ORE, TheLoop, &I); return false; } } // next instr. @@ -833,12 +803,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (Inductions.empty()) { reportVectorizationFailure("Did not find one integer induction var", "loop induction variable could not be identified", - "NoInductionVariable"); + "NoInductionVariable", ORE, TheLoop); return false; } else if (!WidestIndTy) { reportVectorizationFailure("Did not find one integer induction var", "integer loop induction variable could not be identified", - "NoIntegerInductionVariable"); + "NoIntegerInductionVariable", ORE, TheLoop); return false; } else { LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); @@ -869,7 +839,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { reportVectorizationFailure("Stores to a uniform address", "write to a loop invariant address could not be vectorized", - "CantVectorizeStoreToLoopInvariantAddress"); + "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); return false; } Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); @@ -905,7 +875,7 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { } bool LoopVectorizationLegality::blockCanBePredicated( - BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) { + BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) { const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); for (Instruction &I : *BB) { @@ -924,7 +894,7 @@ bool LoopVectorizationLegality::blockCanBePredicated( // !llvm.mem.parallel_loop_access implies if-conversion safety. // Otherwise, record that the load needs (real or emulated) masking // and let the cost model decide. - if (!IsAnnotatedParallel) + if (!IsAnnotatedParallel || PreserveGuards) MaskedOp.insert(LI); continue; } @@ -953,23 +923,41 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!EnableIfConversion) { reportVectorizationFailure("If-conversion is disabled", "if-conversion is disabled", - "IfConversionDisabled"); + "IfConversionDisabled", + ORE, TheLoop); return false; } assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); - // A list of pointers that we can safely read and write to. + // A list of pointers which are known to be dereferenceable within scope of + // the loop body for each iteration of the loop which executes. That is, + // the memory pointed to can be dereferenced (with the access size implied by + // the value's type) unconditionally within the loop header without + // introducing a new fault. SmallPtrSet<Value *, 8> SafePointes; // Collect safe addresses. for (BasicBlock *BB : TheLoop->blocks()) { - if (blockNeedsPredication(BB)) + if (!blockNeedsPredication(BB)) { + for (Instruction &I : *BB) + if (auto *Ptr = getLoadStorePointerOperand(&I)) + SafePointes.insert(Ptr); continue; + } - for (Instruction &I : *BB) - if (auto *Ptr = getLoadStorePointerOperand(&I)) - SafePointes.insert(Ptr); + // For a block which requires predication, a address may be safe to access + // in the loop w/o predication if we can prove dereferenceability facts + // sufficient to ensure it'll never fault within the loop. For the moment, + // we restrict this to loads; stores are more complicated due to + // concurrency restrictions. + ScalarEvolution &SE = *PSE.getSE(); + for (Instruction &I : *BB) { + LoadInst *LI = dyn_cast<LoadInst>(&I); + if (LI && !mustSuppressSpeculation(*LI) && + isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) + SafePointes.insert(LI->getPointerOperand()); + } } // Collect the blocks that need predication. @@ -979,7 +967,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!isa<BranchInst>(BB->getTerminator())) { reportVectorizationFailure("Loop contains a switch statement", "loop contains a switch statement", - "LoopContainsSwitch", BB->getTerminator()); + "LoopContainsSwitch", ORE, TheLoop, + BB->getTerminator()); return false; } @@ -989,14 +978,16 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { reportVectorizationFailure( "Control flow cannot be substituted for a select", "control flow cannot be substituted for a select", - "NoCFGForSelect", BB->getTerminator()); + "NoCFGForSelect", ORE, TheLoop, + BB->getTerminator()); return false; } } else if (BB != Header && !canIfConvertPHINodes(BB)) { reportVectorizationFailure( "Control flow cannot be substituted for a select", "control flow cannot be substituted for a select", - "NoCFGForSelect", BB->getTerminator()); + "NoCFGForSelect", ORE, TheLoop, + BB->getTerminator()); return false; } } @@ -1026,7 +1017,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (!Lp->getLoopPreheader()) { reportVectorizationFailure("Loop doesn't have a legal pre-header", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1037,7 +1028,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (Lp->getNumBackEdges() != 1) { reportVectorizationFailure("The loop must have a single backedge", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1048,7 +1039,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (!Lp->getExitingBlock()) { reportVectorizationFailure("The loop must have an exiting block", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1061,7 +1052,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (Lp->getExitingBlock() != Lp->getLoopLatch()) { reportVectorizationFailure("The exiting block is not the loop latch", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1124,7 +1115,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { if (!canVectorizeOuterLoop()) { reportVectorizationFailure("Unsupported outer loop", "unsupported outer loop", - "UnsupportedOuterLoop"); + "UnsupportedOuterLoop", + ORE, TheLoop); // TODO: Implement DoExtraAnalysis when subsequent legal checks support // outer loops. return false; @@ -1176,7 +1168,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { reportVectorizationFailure("Too many SCEV checks needed", "Too many SCEV assumptions need to be made and checked at runtime", - "TooManySCEVRunTimeChecks"); + "TooManySCEVRunTimeChecks", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1190,7 +1182,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return Result; } -bool LoopVectorizationLegality::canFoldTailByMasking() { +bool LoopVectorizationLegality::prepareToFoldTailByMasking() { LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); @@ -1199,22 +1191,21 @@ bool LoopVectorizationLegality::canFoldTailByMasking() { "No primary induction, cannot fold tail by masking", "Missing a primary induction variable in the loop, which is " "needed in order to fold tail by masking as required.", - "NoPrimaryInduction"); + "NoPrimaryInduction", ORE, TheLoop); return false; } - // TODO: handle reductions when tail is folded by masking. - if (!Reductions.empty()) { - reportVectorizationFailure( - "Loop has reductions, cannot fold tail by masking", - "Cannot fold tail by masking in the presence of reductions.", - "ReductionFoldingTailByMasking"); - return false; - } + SmallPtrSet<const Value *, 8> ReductionLiveOuts; - // TODO: handle outside users when tail is folded by masking. + for (auto &Reduction : *getReductionVars()) + ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); + + // TODO: handle non-reduction outside users when tail is folded by masking. for (auto *AE : AllowedExit) { - // Check that all users of allowed exit values are inside the loop. + // Check that all users of allowed exit values are inside the loop or + // are the live-out of a reduction. + if (ReductionLiveOuts.count(AE)) + continue; for (User *U : AE->users()) { Instruction *UI = cast<Instruction>(U); if (TheLoop->contains(UI)) @@ -1222,7 +1213,7 @@ bool LoopVectorizationLegality::canFoldTailByMasking() { reportVectorizationFailure( "Cannot fold tail by masking, loop has an outside user for", "Cannot fold tail by masking in the presence of live outs.", - "LiveOutFoldingTailByMasking", UI); + "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); return false; } } @@ -1233,11 +1224,12 @@ bool LoopVectorizationLegality::canFoldTailByMasking() { // Check and mark all blocks for predication, including those that ordinarily // do not need predication such as the header block. for (BasicBlock *BB : TheLoop->blocks()) { - if (!blockCanBePredicated(BB, SafePointers)) { + if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { reportVectorizationFailure( "Cannot fold tail by masking as required", "control flow cannot be substituted for a select", - "NoCFGForSelect", BB->getTerminator()); + "NoCFGForSelect", ORE, TheLoop, + BB->getTerminator()); return false; } } |