aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-01-03 18:04:11 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-06 20:13:16 +0000
commit647cbc5de815c5651677bf8582797f716ec7b48d (patch)
tree0a57db146d82068137e0fe0109ca612aaef5afb6 /contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
parentedc2dc17b1f2dfe45dc85e6cc0ff54bca1ac8214 (diff)
parent77dbea07356e1ab2f37a777d4d1ddc5dd3e301c2 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp146
1 files changed, 72 insertions, 74 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 32913b3f5569..304991526064 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4925,36 +4925,34 @@ void BoUpSLP::buildExternalUses(
LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
Instruction *UserInst = dyn_cast<Instruction>(U);
- if (!UserInst)
+ if (!UserInst || isDeleted(UserInst))
continue;
- if (isDeleted(UserInst))
+ // Ignore users in the user ignore list.
+ if (UserIgnoreList && UserIgnoreList->contains(UserInst))
continue;
// Skip in-tree scalars that become vectors
if (TreeEntry *UseEntry = getTreeEntry(U)) {
- Value *UseScalar = UseEntry->Scalars[0];
// Some in-tree scalars will remain as scalar in vectorized
- // instructions. If that is the case, the one in Lane 0 will
+ // instructions. If that is the case, the one in FoundLane will
// be used.
- if (UseScalar != U ||
- UseEntry->State == TreeEntry::ScatterVectorize ||
+ if (UseEntry->State == TreeEntry::ScatterVectorize ||
UseEntry->State == TreeEntry::PossibleStridedVectorize ||
- !doesInTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
+ !doesInTreeUserNeedToExtract(
+ Scalar, cast<Instruction>(UseEntry->Scalars.front()), TLI)) {
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");
assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state");
continue;
}
+ U = nullptr;
}
- // Ignore users in the user ignore list.
- if (UserIgnoreList && UserIgnoreList->contains(UserInst))
- continue;
-
- LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane "
- << Lane << " from " << *Scalar << ".\n");
- ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane));
+ LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst
+ << " from lane " << Lane << " from " << *Scalar
+ << ".\n");
+ ExternalUses.emplace_back(Scalar, U, FoundLane);
}
}
}
@@ -8384,6 +8382,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
(void)E;
return TTI->getInstructionCost(VI, CostKind);
};
+ // FIXME: Workaround for syntax error reported by MSVC buildbots.
+ TargetTransformInfo &TTIRef = *TTI;
// Need to clear CommonCost since the final shuffle cost is included into
// vector cost.
auto GetVectorCost = [&](InstructionCost) {
@@ -8398,14 +8398,15 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// No need to add new vector costs here since we're going to reuse
// same main/alternate vector ops, just do different shuffling.
} else if (Instruction::isBinaryOp(E->getOpcode())) {
- VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
+ VecCost =
+ TTIRef.getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost +=
- TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
+ TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
- VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
- CI0->getPredicate(), CostKind, VL0);
- VecCost += TTI->getCmpSelInstrCost(
+ VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
+ CI0->getPredicate(), CostKind, VL0);
+ VecCost += TTIRef.getCmpSelInstrCost(
E->getOpcode(), VecTy, MaskTy,
cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
E->getAltOp());
@@ -8414,10 +8415,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
- VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
- TTI::CastContextHint::None, CostKind);
- VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
- TTI::CastContextHint::None, CostKind);
+ VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
+ TTI::CastContextHint::None, CostKind);
+ VecCost +=
+ TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
+ TTI::CastContextHint::None, CostKind);
}
SmallVector<int> Mask;
E->buildAltOpShuffleMask(
@@ -8426,8 +8428,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return I->getOpcode() == E->getAltOpcode();
},
Mask);
- VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
- FinalVecTy, Mask);
+ VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+ FinalVecTy, Mask);
+ // Patterns like [fadd,fsub] can be combined into a single instruction
+ // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we
+ // need to take into account their order when looking for the most used
+ // order.
+ unsigned Opcode0 = E->getOpcode();
+ unsigned Opcode1 = E->getAltOpcode();
+ // The opcode mask selects between the two opcodes.
+ SmallBitVector OpcodeMask(E->Scalars.size(), false);
+ for (unsigned Lane : seq<unsigned>(0, E->Scalars.size()))
+ if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1)
+ OpcodeMask.set(Lane);
+ // If this pattern is supported by the target then we consider the
+ // order.
+ if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
+ InstructionCost AltVecCost = TTIRef.getAltInstrCost(
+ VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
+ return AltVecCost < VecCost ? AltVecCost : VecCost;
+ }
+ // TODO: Check the reverse order too.
return VecCost;
};
return GetCostDiff(GetScalarCost, GetVectorCost);
@@ -11493,17 +11514,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Value *PO = LI->getPointerOperand();
if (E->State == TreeEntry::Vectorize) {
NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign());
-
- // The pointer operand uses an in-tree scalar so we add the new
- // LoadInst to ExternalUses list to make sure that an extract will
- // be generated in the future.
- if (isa<Instruction>(PO)) {
- if (TreeEntry *Entry = getTreeEntry(PO)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(PO);
- ExternalUses.emplace_back(PO, NewLI, FoundLane);
- }
- }
} else {
assert((E->State == TreeEntry::ScatterVectorize ||
E->State == TreeEntry::PossibleStridedVectorize) &&
@@ -11539,17 +11549,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
StoreInst *ST =
Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign());
- // The pointer operand uses an in-tree scalar, so add the new StoreInst to
- // ExternalUses to make sure that an extract will be generated in the
- // future.
- if (isa<Instruction>(Ptr)) {
- if (TreeEntry *Entry = getTreeEntry(Ptr)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(Ptr);
- ExternalUses.push_back(ExternalUser(Ptr, ST, FoundLane));
- }
- }
-
Value *V = propagateMetadata(ST, E->Scalars);
E->VectorizedValue = V;
@@ -11597,10 +11596,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E);
- Intrinsic::ID IID = Intrinsic::not_intrinsic;
- if (Function *FI = CI->getCalledFunction())
- IID = FI->getIntrinsicID();
-
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
@@ -11611,18 +11606,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
SmallVector<Value *> OpVecs;
SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
+ if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
TysForDecl.push_back(
FixedVectorType::get(CI->getType(), E->Scalars.size()));
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
ValueList OpVL;
// Some intrinsics have scalar arguments. This argument should not be
// vectorized.
- if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, I)) {
+ if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) {
CallInst *CEI = cast<CallInst>(VL0);
ScalarArg = CEI->getArgOperand(I);
OpVecs.push_back(CEI->getArgOperand(I));
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I))
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
TysForDecl.push_back(ScalarArg->getType());
continue;
}
@@ -11634,7 +11629,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
}
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I))
+ if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
TysForDecl.push_back(OpVec->getType());
}
@@ -11654,18 +11649,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
CI->getOperandBundlesAsDefs(OpBundles);
Value *V = Builder.CreateCall(CF, OpVecs, OpBundles);
- // The scalar argument uses an in-tree scalar so we add the new vectorized
- // call to ExternalUses list to make sure that an extract will be
- // generated in the future.
- if (isa_and_present<Instruction>(ScalarArg)) {
- if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
- ExternalUses.push_back(
- ExternalUser(ScalarArg, cast<User>(V), FoundLane));
- }
- }
-
propagateIRFlags(V, E->Scalars, VL0);
V = FinalShuffle(V, E, VecTy, IsSigned);
@@ -11877,6 +11860,7 @@ Value *BoUpSLP::vectorizeTree(
DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs;
SmallDenseSet<Value *, 4> UsedInserts;
DenseMap<Value *, Value *> VectorCasts;
+ SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
// Extract all of the elements with the external uses.
for (const auto &ExternalUse : ExternalUses) {
Value *Scalar = ExternalUse.Scalar;
@@ -11947,13 +11931,27 @@ Value *BoUpSLP::vectorizeTree(
VectorToInsertElement.try_emplace(Vec, IE);
return Vec;
};
- // If User == nullptr, the Scalar is used as extra arg. Generate
- // ExtractElement instruction and update the record for this scalar in
- // ExternallyUsedValues.
+ // If User == nullptr, the Scalar remains as scalar in vectorized
+ // instructions or is used as extra arg. Generate ExtractElement instruction
+ // and update the record for this scalar in ExternallyUsedValues.
if (!User) {
- assert(ExternallyUsedValues.count(Scalar) &&
- "Scalar with nullptr as an external user must be registered in "
- "ExternallyUsedValues map");
+ if (!ScalarsWithNullptrUser.insert(Scalar).second)
+ continue;
+ assert((ExternallyUsedValues.count(Scalar) ||
+ any_of(Scalar->users(),
+ [&](llvm::User *U) {
+ TreeEntry *UseEntry = getTreeEntry(U);
+ return UseEntry &&
+ UseEntry->State == TreeEntry::Vectorize &&
+ E->State == TreeEntry::Vectorize &&
+ doesInTreeUserNeedToExtract(
+ Scalar,
+ cast<Instruction>(UseEntry->Scalars.front()),
+ TLI);
+ })) &&
+ "Scalar with nullptr User must be registered in "
+ "ExternallyUsedValues map or remain as scalar in vectorized "
+ "instructions");
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
if (auto *PHI = dyn_cast<PHINode>(VecI))
Builder.SetInsertPoint(PHI->getParent(),
@@ -16222,7 +16220,7 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
for (auto *V : Candidates) {
auto *GEP = cast<GetElementPtrInst>(V);
auto *GEPIdx = GEP->idx_begin()->get();
- assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx));
+ assert(GEP->getNumIndices() == 1 && !isa<Constant>(GEPIdx));
Bundle[BundleIndex++] = GEPIdx;
}