aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp74
1 files changed, 49 insertions, 25 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3f943f4c0688..23613775d896 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -13,14 +13,17 @@
// pass. It should be easy to create an analysis pass around it if there
// is a need (but D45420 needs to happen first).
//
-#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
using namespace llvm;
+using namespace PatternMatch;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
@@ -566,6 +569,28 @@ bool LoopVectorizationLegality::setupOuterLoopInductions() {
return false;
}
+/// Checks if a function is scalarizable according to the TLI, in
+/// the sense that it should be vectorized and then expanded in
+/// multiple scalarcalls. This is represented in the
+/// TLI via mappings that do not specify a vector name, as in the
+/// following example:
+///
+/// const VecDesc VecIntrinsics[] = {
+/// {"llvm.phx.abs.i32", "", 4}
+/// };
+static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
+ const StringRef ScalarName = CI.getCalledFunction()->getName();
+ bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
+ // Check that all known VFs are not associated to a vector
+ // function, i.e. the vector name is emty.
+ if (Scalarize)
+ for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName);
+ VF <= WidestVF; VF *= 2) {
+ Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
+ }
+ return Scalarize;
+}
+
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *Header = TheLoop->getHeader();
@@ -644,6 +669,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
SinkAfter, DT)) {
+ AllowedExit.insert(Phi);
FirstOrderRecurrences.insert(Phi);
continue;
}
@@ -667,10 +693,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// * Have a mapping to an IR intrinsic.
// * Have a vector version available.
auto *CI = dyn_cast<CallInst>(&I);
+
if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
!isa<DbgInfoIntrinsic>(CI) &&
!(CI->getCalledFunction() && TLI &&
- TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
+ (!VFDatabase::getMappings(*CI).empty() ||
+ isTLIScalarize(*TLI, *CI)))) {
// If the call is a recognized math libary call, it is likely that
// we can vectorize it given loosened floating-point constraints.
LibFunc Func;
@@ -685,7 +713,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// but it's hard to provide meaningful yet generic advice.
// Also, should this be guarded by allowExtraAnalysis() and/or be part
// of the returned info from isFunctionVectorizable()?
- reportVectorizationFailure("Found a non-intrinsic callsite",
+ reportVectorizationFailure(
+ "Found a non-intrinsic callsite",
"library call cannot be vectorized. "
"Try compiling with -fno-math-errno, -ffast-math, "
"or similar flags",
@@ -739,11 +768,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// supported on the target.
if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
// Arbitrarily try a vector of 2 elements.
- Type *VecTy = VectorType::get(T, /*NumElements=*/2);
+ auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
assert(VecTy && "did not find vectorized version of stored type");
- const MaybeAlign Alignment = getLoadStoreAlignment(ST);
- assert(Alignment && "Alignment should be set");
- if (!TTI->isLegalNTStore(VecTy, *Alignment)) {
+ if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
reportVectorizationFailure(
"nontemporal store instruction cannot be vectorized",
"nontemporal store instruction cannot be vectorized",
@@ -756,11 +783,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
// For nontemporal loads, check that a nontemporal vector version is
// supported on the target (arbitrarily try a vector of 2 elements).
- Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
+ auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
assert(VecTy && "did not find vectorized version of load type");
- const MaybeAlign Alignment = getLoadStoreAlignment(LD);
- assert(Alignment && "Alignment should be set");
- if (!TTI->isLegalNTLoad(VecTy, *Alignment)) {
+ if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
reportVectorizationFailure(
"nontemporal load instruction cannot be vectorized",
"nontemporal load instruction cannot be vectorized",
@@ -897,6 +922,14 @@ bool LoopVectorizationLegality::blockCanBePredicated(
if (C->canTrap())
return false;
}
+
+ // We can predicate blocks with calls to assume, as long as we drop them in
+ // case we flatten the CFG via predication.
+ if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
+ ConditionalAssumes.insert(&I);
+ continue;
+ }
+
// We might be able to hoist the load.
if (I.mayReadFromMemory()) {
auto *LI = dyn_cast<LoadInst>(&I);
@@ -947,14 +980,14 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// the memory pointed to can be dereferenced (with the access size implied by
// the value's type) unconditionally within the loop header without
// introducing a new fault.
- SmallPtrSet<Value *, 8> SafePointes;
+ SmallPtrSet<Value *, 8> SafePointers;
// Collect safe addresses.
for (BasicBlock *BB : TheLoop->blocks()) {
if (!blockNeedsPredication(BB)) {
for (Instruction &I : *BB)
if (auto *Ptr = getLoadStorePointerOperand(&I))
- SafePointes.insert(Ptr);
+ SafePointers.insert(Ptr);
continue;
}
@@ -968,7 +1001,7 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
LoadInst *LI = dyn_cast<LoadInst>(&I);
if (LI && !mustSuppressSpeculation(*LI) &&
isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
- SafePointes.insert(LI->getPointerOperand());
+ SafePointers.insert(LI->getPointerOperand());
}
}
@@ -986,7 +1019,7 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB)) {
- if (!blockCanBePredicated(BB, SafePointes)) {
+ if (!blockCanBePredicated(BB, SafePointers)) {
reportVectorizationFailure(
"Control flow cannot be substituted for a select",
"control flow cannot be substituted for a select",
@@ -1198,18 +1231,9 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
- if (!PrimaryInduction) {
- reportVectorizationFailure(
- "No primary induction, cannot fold tail by masking",
- "Missing a primary induction variable in the loop, which is "
- "needed in order to fold tail by masking as required.",
- "NoPrimaryInduction", ORE, TheLoop);
- return false;
- }
-
SmallPtrSet<const Value *, 8> ReductionLiveOuts;
- for (auto &Reduction : *getReductionVars())
+ for (auto &Reduction : getReductionVars())
ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
// TODO: handle non-reduction outside users when tail is folded by masking.