aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp69
1 files changed, 49 insertions, 20 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8e22b54f002d..055fbb00871f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6894,6 +6894,31 @@ protected:
};
} // namespace
+/// Returns the cost of the shuffle instructions with the given \p Kind, vector
+/// type \p Tp and optional \p Mask. Adds SLP-specifc cost estimation for insert
+/// subvector pattern.
+static InstructionCost
+getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
+ VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ int Index = 0, VectorType *SubTp = nullptr,
+ ArrayRef<const Value *> Args = std::nullopt) {
+ if (Kind != TTI::SK_PermuteTwoSrc)
+ return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
+ int NumSrcElts = Tp->getElementCount().getKnownMinValue();
+ int NumSubElts;
+ if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
+ Mask, NumSrcElts, NumSubElts, Index)) {
+ if (Index + NumSubElts > NumSrcElts &&
+ Index + NumSrcElts <= static_cast<int>(Mask.size()))
+ return TTI.getShuffleCost(
+ TTI::SK_InsertSubvector,
+ FixedVectorType::get(Tp->getElementType(), Mask.size()), std::nullopt,
+ TTI::TCK_RecipThroughput, Index, Tp);
+ }
+ return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
+}
+
/// Merges shuffle masks and emits final shuffle instruction, if required. It
/// supports shuffling of 2 input vectors. It implements lazy shuffles emission,
/// when the actual shuffle instruction is generated only if this is actually
@@ -7141,15 +7166,15 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
std::optional<TTI::ShuffleKind> RegShuffleKind =
CheckPerRegistersShuffle(SubMask);
if (!RegShuffleKind) {
- Cost += TTI.getShuffleCost(
- *ShuffleKinds[Part],
+ Cost += ::getShuffleCost(
+ TTI, *ShuffleKinds[Part],
FixedVectorType::get(VL.front()->getType(), NumElts), MaskSlice);
continue;
}
if (*RegShuffleKind != TTI::SK_PermuteSingleSrc ||
!ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) {
- Cost += TTI.getShuffleCost(
- *RegShuffleKind,
+ Cost += ::getShuffleCost(
+ TTI, *RegShuffleKind,
FixedVectorType::get(VL.front()->getType(), EltsPerVector),
SubMask);
}
@@ -7222,8 +7247,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
if (isEmptyOrIdentity(Mask, VF))
return TTI::TCC_Free;
- return TTI.getShuffleCost(TTI::SK_PermuteTwoSrc,
- cast<VectorType>(V1->getType()), Mask);
+ return ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc,
+ cast<VectorType>(V1->getType()), Mask);
}
InstructionCost createShuffleVector(Value *V1, ArrayRef<int> Mask) const {
// Empty mask or identity mask are free.
@@ -8101,7 +8126,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I)
Mask[I] =
((I >= InMask.size()) || InMask.test(I)) ? PoisonMaskElem : I;
- Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask);
+ Cost +=
+ ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, InsertVecTy, Mask);
}
}
return Cost;
@@ -8428,8 +8454,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return I->getOpcode() == E->getAltOpcode();
},
Mask);
- VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
- FinalVecTy, Mask);
+ VecCost += ::getShuffleCost(TTIRef, TargetTransformInfo::SK_PermuteTwoSrc,
+ FinalVecTy, Mask);
// Patterns like [fadd,fsub] can be combined into a single instruction
// in x86. Reordering them into [fsub,fadd] blocks this pattern. So we
// need to take into account their order when looking for the most used
@@ -9133,7 +9159,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
auto *FTy =
FixedVectorType::get(TEs.back()->Scalars.front()->getType(), VF);
InstructionCost C =
- TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask);
+ ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, FTy, Mask);
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for final shuffle of vector node and external "
"insertelement users.\n";
@@ -11991,8 +12017,12 @@ Value *BoUpSLP::vectorizeTree(
IRBuilder<>::InsertPointGuard Guard(Builder);
if (auto *IVec = dyn_cast<Instruction>(Vec))
Builder.SetInsertPoint(IVec->getNextNonDebugInstruction());
- Vec = Builder.CreateIntCast(Vec, VU->getType(),
- BWIt->second.second);
+ Vec = Builder.CreateIntCast(
+ Vec,
+ FixedVectorType::get(
+ cast<VectorType>(VU->getType())->getElementType(),
+ cast<FixedVectorType>(Vec->getType())->getNumElements()),
+ BWIt->second.second);
VectorCasts.try_emplace(Scalar, Vec);
} else {
Vec = VecIt->second;
@@ -13070,10 +13100,14 @@ bool BoUpSLP::collectValuesToDemote(
if (isa<Constant>(V))
return true;
- // If the value is not a vectorized instruction in the expression with only
- // one use, it cannot be demoted.
+ // If the value is not a vectorized instruction in the expression and not used
+ // by the insertelement instruction and not used in multiple vector nodes, it
+ // cannot be demoted.
auto *I = dyn_cast<Instruction>(V);
- if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert(I).second)
+ if (!I || !getTreeEntry(I) || MultiNodeScalars.contains(I) ||
+ !Visited.insert(I).second || all_of(I->users(), [&](User *U) {
+ return isa<InsertElementInst>(U) && !getTreeEntry(U);
+ }))
return false;
unsigned Start = 0;
@@ -13144,11 +13178,6 @@ bool BoUpSLP::collectValuesToDemote(
}
void BoUpSLP::computeMinimumValueSizes() {
- // If there are no external uses, the expression tree must be rooted by a
- // store. We can't demote in-memory values, so there is nothing to do here.
- if (ExternalUses.empty())
- return;
-
// We only attempt to truncate integer expressions.
auto &TreeRoot = VectorizableTree[0]->Scalars;
auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());