aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-04-26 11:23:24 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-06-13 20:01:15 +0000
commitd409305fa3838fb39b38c26fc085fb729b8766d5 (patch)
treefd234b27775fb59a57266cf36a05ec916e79a85f /contrib/llvm-project/llvm/lib/Transforms/Vectorize
parente8d8bef961a50d4dc22501cde4fb9fb0be1b2532 (diff)
parentb4125f7d51da2bb55d3b850dba9a69c201c3422c (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Vectorize')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp40
2 files changed, 23 insertions, 21 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 1795470fa58c..19797e6f7858 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -142,6 +142,10 @@ public:
return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
}
+ VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) {
+ return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal});
+ }
+
//===--------------------------------------------------------------------===//
// RAII helpers.
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ea0d7673edf6..b456a97aa4ec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) {
/// A helper function that returns true if the given type is irregular. The
/// type is irregular if its allocated size doesn't equal the store size of an
-/// element of the corresponding vector type at the given vectorization factor.
-static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) {
- // Determine if an array of VF elements of type Ty is "bitcast compatible"
- // with a <VF x Ty> vector.
- if (VF.isVector()) {
- auto *VectorTy = VectorType::get(Ty, VF);
- return TypeSize::get(VF.getKnownMinValue() *
- DL.getTypeAllocSize(Ty).getFixedValue(),
- VF.isScalable()) != DL.getTypeStoreSize(VectorTy);
- }
-
- // If the vectorization factor is one, we just check if an array of type Ty
- // requires padding between elements.
+/// element of the corresponding vector type.
+static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
+ // Determine if an array of N elements of type Ty is "bitcast compatible"
+ // with a <N x Ty> vector.
+ // This is only true if there is no padding between the array elements.
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
}
@@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
auto *ScalarTy = getMemInstValueType(I);
- if (hasIrregularType(ScalarTy, DL, VF))
+ if (hasIrregularType(ScalarTy, DL))
return false;
// Check if masking is required.
@@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
- if (hasIrregularType(ScalarTy, DL, VF))
+ if (hasIrregularType(ScalarTy, DL))
return false;
return true;
@@ -5504,11 +5496,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return None;
}
- ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
-
switch (ScalarEpilogueStatus) {
case CM_ScalarEpilogueAllowed:
- return MaxVF;
+ return computeFeasibleMaxVF(TC, UserVF);
case CM_ScalarEpilogueNotAllowedUsePredicate:
LLVM_FALLTHROUGH;
case CM_ScalarEpilogueNotNeededUsePredicate:
@@ -5546,7 +5536,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
"scalar epilogue instead.\n");
ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
- return MaxVF;
+ return computeFeasibleMaxVF(TC, UserVF);
}
return None;
}
@@ -5563,6 +5553,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
}
+ ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
assert(!MaxVF.isScalable() &&
"Scalable vectors do not yet support tail folding");
assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&
@@ -8196,8 +8187,15 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
if (BI->getSuccessor(0) != Dst)
EdgeMask = Builder.createNot(EdgeMask);
- if (SrcMask) // Otherwise block in-mask is all-one, no need to AND.
- EdgeMask = Builder.createAnd(EdgeMask, SrcMask);
+ if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
+ // The condition is 'SrcMask && EdgeMask', which is equivalent to
+ // 'select i1 SrcMask, i1 EdgeMask, i1 false'.
+ // The select version does not introduce new UB if SrcMask is false and
+ // EdgeMask is poison. Using 'and' here introduces undefined behavior.
+ VPValue *False = Plan->getOrAddVPValue(
+ ConstantInt::getFalse(BI->getCondition()->getType()));
+ EdgeMask = Builder.createSelect(SrcMask, EdgeMask, False);
+ }
return EdgeMaskCache[Edge] = EdgeMask;
}