diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-03 18:04:11 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:13:16 +0000 |
commit | 647cbc5de815c5651677bf8582797f716ec7b48d (patch) | |
tree | 0a57db146d82068137e0fe0109ca612aaef5afb6 /contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | |
parent | edc2dc17b1f2dfe45dc85e6cc0ff54bca1ac8214 (diff) | |
parent | 77dbea07356e1ab2f37a777d4d1ddc5dd3e301c2 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 146 |
1 files changed, 72 insertions, 74 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 32913b3f5569..304991526064 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4925,36 +4925,34 @@ void BoUpSLP::buildExternalUses( LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); Instruction *UserInst = dyn_cast<Instruction>(U); - if (!UserInst) + if (!UserInst || isDeleted(UserInst)) continue; - if (isDeleted(UserInst)) + // Ignore users in the user ignore list. + if (UserIgnoreList && UserIgnoreList->contains(UserInst)) continue; // Skip in-tree scalars that become vectors if (TreeEntry *UseEntry = getTreeEntry(U)) { - Value *UseScalar = UseEntry->Scalars[0]; // Some in-tree scalars will remain as scalar in vectorized - // instructions. If that is the case, the one in Lane 0 will + // instructions. If that is the case, the one in FoundLane will // be used. - if (UseScalar != U || - UseEntry->State == TreeEntry::ScatterVectorize || + if (UseEntry->State == TreeEntry::ScatterVectorize || UseEntry->State == TreeEntry::PossibleStridedVectorize || - !doesInTreeUserNeedToExtract(Scalar, UserInst, TLI)) { + !doesInTreeUserNeedToExtract( + Scalar, cast<Instruction>(UseEntry->Scalars.front()), TLI)) { LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U << ".\n"); assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state"); continue; } + U = nullptr; } - // Ignore users in the user ignore list. - if (UserIgnoreList && UserIgnoreList->contains(UserInst)) - continue; - - LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " - << Lane << " from " << *Scalar << ".\n"); - ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane)); + LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst + << " from lane " << Lane << " from " << *Scalar + << ".\n"); + ExternalUses.emplace_back(Scalar, U, FoundLane); } } } @@ -8384,6 +8382,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, (void)E; return TTI->getInstructionCost(VI, CostKind); }; + // FIXME: Workaround for syntax error reported by MSVC buildbots. + TargetTransformInfo &TTIRef = *TTI; // Need to clear CommonCost since the final shuffle cost is included into // vector cost. auto GetVectorCost = [&](InstructionCost) { @@ -8398,14 +8398,15 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, // No need to add new vector costs here since we're going to reuse // same main/alternate vector ops, just do different shuffling. } else if (Instruction::isBinaryOp(E->getOpcode())) { - VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); + VecCost = + TTIRef.getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); VecCost += - TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); + TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) { auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); - VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - CI0->getPredicate(), CostKind, VL0); - VecCost += TTI->getCmpSelInstrCost( + VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, + CI0->getPredicate(), CostKind, VL0); + VecCost += TTIRef.getCmpSelInstrCost( E->getOpcode(), VecTy, MaskTy, cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind, E->getAltOp()); @@ -8414,10 +8415,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size()); auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size()); - VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, - TTI::CastContextHint::None, CostKind); - VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, - TTI::CastContextHint::None, CostKind); + VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, + TTI::CastContextHint::None, CostKind); + VecCost += + TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, + TTI::CastContextHint::None, CostKind); } SmallVector<int> Mask; E->buildAltOpShuffleMask( @@ -8426,8 +8428,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, return I->getOpcode() == E->getAltOpcode(); }, Mask); - VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - FinalVecTy, Mask); + VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + FinalVecTy, Mask); + // Patterns like [fadd,fsub] can be combined into a single instruction + // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we + // need to take into account their order when looking for the most used + // order. + unsigned Opcode0 = E->getOpcode(); + unsigned Opcode1 = E->getAltOpcode(); + // The opcode mask selects between the two opcodes. + SmallBitVector OpcodeMask(E->Scalars.size(), false); + for (unsigned Lane : seq<unsigned>(0, E->Scalars.size())) + if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1) + OpcodeMask.set(Lane); + // If this pattern is supported by the target then we consider the + // order. + if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) { + InstructionCost AltVecCost = TTIRef.getAltInstrCost( + VecTy, Opcode0, Opcode1, OpcodeMask, CostKind); + return AltVecCost < VecCost ? AltVecCost : VecCost; + } + // TODO: Check the reverse order too. return VecCost; }; return GetCostDiff(GetScalarCost, GetVectorCost); @@ -11493,17 +11514,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *PO = LI->getPointerOperand(); if (E->State == TreeEntry::Vectorize) { NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign()); - - // The pointer operand uses an in-tree scalar so we add the new - // LoadInst to ExternalUses list to make sure that an extract will - // be generated in the future. - if (isa<Instruction>(PO)) { - if (TreeEntry *Entry = getTreeEntry(PO)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(PO); - ExternalUses.emplace_back(PO, NewLI, FoundLane); - } - } } else { assert((E->State == TreeEntry::ScatterVectorize || E->State == TreeEntry::PossibleStridedVectorize) && @@ -11539,17 +11549,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { StoreInst *ST = Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign()); - // The pointer operand uses an in-tree scalar, so add the new StoreInst to - // ExternalUses to make sure that an extract will be generated in the - // future. - if (isa<Instruction>(Ptr)) { - if (TreeEntry *Entry = getTreeEntry(Ptr)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(Ptr); - ExternalUses.push_back(ExternalUser(Ptr, ST, FoundLane)); - } - } - Value *V = propagateMetadata(ST, E->Scalars); E->VectorizedValue = V; @@ -11597,10 +11596,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { CallInst *CI = cast<CallInst>(VL0); setInsertPointAfterBundle(E); - Intrinsic::ID IID = Intrinsic::not_intrinsic; - if (Function *FI = CI->getCalledFunction()) - IID = FI->getIntrinsicID(); - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); @@ -11611,18 +11606,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { SmallVector<Value *> OpVecs; SmallVector<Type *, 2> TysForDecl; // Add return type if intrinsic is overloaded on it. - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1)) + if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); for (unsigned I : seq<unsigned>(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. - if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, I)) { + if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { CallInst *CEI = cast<CallInst>(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I)) + if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(ScalarArg->getType()); continue; } @@ -11634,7 +11629,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I)) + if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(OpVec->getType()); } @@ -11654,18 +11649,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { CI->getOperandBundlesAsDefs(OpBundles); Value *V = Builder.CreateCall(CF, OpVecs, OpBundles); - // The scalar argument uses an in-tree scalar so we add the new vectorized - // call to ExternalUses list to make sure that an extract will be - // generated in the future. - if (isa_and_present<Instruction>(ScalarArg)) { - if (TreeEntry *Entry = getTreeEntry(ScalarArg)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(ScalarArg); - ExternalUses.push_back( - ExternalUser(ScalarArg, cast<User>(V), FoundLane)); - } - } - propagateIRFlags(V, E->Scalars, VL0); V = FinalShuffle(V, E, VecTy, IsSigned); @@ -11877,6 +11860,7 @@ Value *BoUpSLP::vectorizeTree( DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs; SmallDenseSet<Value *, 4> UsedInserts; DenseMap<Value *, Value *> VectorCasts; + SmallDenseSet<Value *, 4> ScalarsWithNullptrUser; // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -11947,13 +11931,27 @@ Value *BoUpSLP::vectorizeTree( VectorToInsertElement.try_emplace(Vec, IE); return Vec; }; - // If User == nullptr, the Scalar is used as extra arg. Generate - // ExtractElement instruction and update the record for this scalar in - // ExternallyUsedValues. + // If User == nullptr, the Scalar remains as scalar in vectorized + // instructions or is used as extra arg. Generate ExtractElement instruction + // and update the record for this scalar in ExternallyUsedValues. if (!User) { - assert(ExternallyUsedValues.count(Scalar) && - "Scalar with nullptr as an external user must be registered in " - "ExternallyUsedValues map"); + if (!ScalarsWithNullptrUser.insert(Scalar).second) + continue; + assert((ExternallyUsedValues.count(Scalar) || + any_of(Scalar->users(), + [&](llvm::User *U) { + TreeEntry *UseEntry = getTreeEntry(U); + return UseEntry && + UseEntry->State == TreeEntry::Vectorize && + E->State == TreeEntry::Vectorize && + doesInTreeUserNeedToExtract( + Scalar, + cast<Instruction>(UseEntry->Scalars.front()), + TLI); + })) && + "Scalar with nullptr User must be registered in " + "ExternallyUsedValues map or remain as scalar in vectorized " + "instructions"); if (auto *VecI = dyn_cast<Instruction>(Vec)) { if (auto *PHI = dyn_cast<PHINode>(VecI)) Builder.SetInsertPoint(PHI->getParent(), @@ -16222,7 +16220,7 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { for (auto *V : Candidates) { auto *GEP = cast<GetElementPtrInst>(V); auto *GEPIdx = GEP->idx_begin()->get(); - assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx)); + assert(GEP->getNumIndices() == 1 && !isa<Constant>(GEPIdx)); Bundle[BundleIndex++] = GEPIdx; } |