diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Vectorize')
8 files changed, 268 insertions, 166 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index cff72ae263d8..a7ebf78e54ce 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -148,26 +148,27 @@ public: VPInstruction *createOverflowingOp(unsigned Opcode, std::initializer_list<VPValue *> Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, - DebugLoc DL, const Twine &Name = "") { + DebugLoc DL = {}, const Twine &Name = "") { return tryInsertInstruction( new VPInstruction(Opcode, Operands, WrapFlags, DL, Name)); } - VPValue *createNot(VPValue *Operand, DebugLoc DL, const Twine &Name = "") { + VPValue *createNot(VPValue *Operand, DebugLoc DL = {}, + const Twine &Name = "") { return createInstruction(VPInstruction::Not, {Operand}, DL, Name); } - VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL, + VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {}, const Twine &Name = "") { return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name); } - VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL, + VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {}, const Twine &Name = "") { return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name); } VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, - DebugLoc DL, const Twine &Name = "", + DebugLoc DL = {}, const Twine &Name = "", std::optional<FastMathFlags> FMFs = std::nullopt) { auto *Select = FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9743fa0e7402..6ca93e15719f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2346,9 +2346,8 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue, auto *Offset = CreateMul(Index, Step); return CreateAdd(StartValue, Offset); } - case InductionDescriptor::IK_PtrInduction: { - return B.CreateGEP(B.getInt8Ty(), StartValue, CreateMul(Index, Step)); - } + case InductionDescriptor::IK_PtrInduction: + return B.CreatePtrAdd(StartValue, CreateMul(Index, Step)); case InductionDescriptor::IK_FpInduction: { assert(!isa<VectorType>(Index->getType()) && "Vector indices not supported for FP inductions yet"); @@ -6950,10 +6949,25 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, Op2Info.Kind = TargetTransformInfo::OK_UniformValue; SmallVector<const Value *, 4> Operands(I->operand_values()); - return TTI.getArithmeticInstrCost( + auto InstrCost = TTI.getArithmeticInstrCost( I->getOpcode(), VectorTy, CostKind, {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, Op2Info, Operands, I); + + // Some targets can replace frem with vector library calls. + InstructionCost VecCallCost = InstructionCost::getInvalid(); + if (I->getOpcode() == Instruction::FRem) { + LibFunc Func; + if (TLI->getLibFunc(I->getOpcode(), I->getType(), Func) && + TLI->isFunctionVectorizable(TLI->getName(Func), VF)) { + SmallVector<Type *, 4> OpTypes; + for (auto &Op : I->operands()) + OpTypes.push_back(Op->getType()); + VecCallCost = + TTI.getCallInstrCost(nullptr, VectorTy, OpTypes, CostKind); + } + } + return std::min(InstrCost, VecCallCost); } case Instruction::FNeg: { return TTI.getArithmeticInstrCost( @@ -8247,7 +8261,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, }, Range); if (ShouldUseVectorIntrinsic) - return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID); + return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID, + CI->getDebugLoc()); Function *Variant = nullptr; std::optional<unsigned> MaskPos; @@ -8300,7 +8315,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, } return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), - Intrinsic::not_intrinsic, Variant); + Intrinsic::not_intrinsic, CI->getDebugLoc(), + Variant); } return nullptr; @@ -8949,16 +8965,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( "AnyOf reductions are not allowed for in-loop reductions"); // Collect the chain of "link" recipes for the reduction starting at PhiR. - SetVector<VPRecipeBase *> Worklist; + SetVector<VPSingleDefRecipe *> Worklist; Worklist.insert(PhiR); for (unsigned I = 0; I != Worklist.size(); ++I) { - VPRecipeBase *Cur = Worklist[I]; - for (VPUser *U : Cur->getVPSingleValue()->users()) { - auto *UserRecipe = dyn_cast<VPRecipeBase>(U); - if (!UserRecipe) + VPSingleDefRecipe *Cur = Worklist[I]; + for (VPUser *U : Cur->users()) { + auto *UserRecipe = dyn_cast<VPSingleDefRecipe>(U); + if (!UserRecipe) { + assert(isa<VPLiveOut>(U) && + "U must either be a VPSingleDef or VPLiveOut"); continue; - assert(UserRecipe->getNumDefinedValues() == 1 && - "recipes must define exactly one result value"); + } Worklist.insert(UserRecipe); } } @@ -8968,10 +8985,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // (PreviousLink) to tell which of the two operands of a Link will remain // scalar and which will be reduced. For minmax by select(cmp), Link will be // the select instructions. - VPRecipeBase *PreviousLink = PhiR; // Aka Worklist[0]. - for (VPRecipeBase *CurrentLink : Worklist.getArrayRef().drop_front()) { - VPValue *PreviousLinkV = PreviousLink->getVPSingleValue(); - + VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0]. + for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef().drop_front()) { Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr(); // Index of the first operand which holds a non-mask vector operand. @@ -8986,7 +9001,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( "Expected instruction to be a call to the llvm.fmuladd intrinsic"); assert(((MinVF.isScalar() && isa<VPReplicateRecipe>(CurrentLink)) || isa<VPWidenCallRecipe>(CurrentLink)) && - CurrentLink->getOperand(2) == PreviousLinkV && + CurrentLink->getOperand(2) == PreviousLink && "expected a call where the previous link is the added operand"); // If the instruction is a call to the llvm.fmuladd intrinsic then we @@ -9017,15 +9032,15 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // Note that for non-commutable operands (cmp-selects), the semantics of // the cmp-select are captured in the recurrence kind. unsigned VecOpId = - CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLinkV + CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink ? IndexOfFirstOperand + 1 : IndexOfFirstOperand; VecOp = CurrentLink->getOperand(VecOpId); - assert(VecOp != PreviousLinkV && + assert(VecOp != PreviousLink && CurrentLink->getOperand(CurrentLink->getNumOperands() - 1 - (VecOpId - IndexOfFirstOperand)) == - PreviousLinkV && - "PreviousLinkV must be the operand other than VecOp"); + PreviousLink && + "PreviousLink must be the operand other than VecOp"); } BasicBlock *BB = CurrentLinkI->getParent(); @@ -9037,19 +9052,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( } VPReductionRecipe *RedRecipe = new VPReductionRecipe( - RdxDesc, CurrentLinkI, PreviousLinkV, VecOp, CondOp); + RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp); // Append the recipe to the end of the VPBasicBlock because we need to // ensure that it comes after all of it's inputs, including CondOp. // Note that this transformation may leave over dead recipes (including // CurrentLink), which will be cleaned by a later VPlan transform. LinkVPBB->appendRecipe(RedRecipe); - CurrentLink->getVPSingleValue()->replaceAllUsesWith(RedRecipe); + CurrentLink->replaceAllUsesWith(RedRecipe); PreviousLink = RedRecipe; } } - Builder.setInsertPoint(&*LatchVPBB->begin()); - for (VPRecipeBase &R : - Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { + Builder.setInsertPoint(&*LatchVPBB->begin()); + for (VPRecipeBase &R : + Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R); if (!PhiR) continue; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 055fbb00871f..601d2454c1e1 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7379,6 +7379,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { continue; if (Idx >= static_cast<int>(CommonVF)) Idx = E1Mask[Idx - CommonVF] + VF; + else + Idx = E1Mask[Idx]; } CommonVF = VF; } @@ -12986,8 +12988,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { for (ScheduleData *BundleMember = Picked; BundleMember; BundleMember = BundleMember->NextInBundle) { Instruction *PickedInst = BundleMember->Inst; - if (PickedInst->getNextNode() != LastScheduledInst) - PickedInst->moveBefore(LastScheduledInst); + if (PickedInst->getNextNonDebugInstruction() != LastScheduledInst) + PickedInst->moveAfter(LastScheduledInst->getPrevNode()); LastScheduledInst = PickedInst; } @@ -13181,7 +13183,7 @@ void BoUpSLP::computeMinimumValueSizes() { // We only attempt to truncate integer expressions. auto &TreeRoot = VectorizableTree[0]->Scalars; auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType()); - if (!TreeRootIT) + if (!TreeRootIT || VectorizableTree.front()->State == TreeEntry::NeedToGather) return; // Ensure the roots of the vectorizable tree don't form a cycle. @@ -14792,8 +14794,17 @@ public: LocalExternallyUsedValues[RdxVal]; // Update LocalExternallyUsedValues for the scalar, replaced by // extractelement instructions. + DenseMap<Value *, Value *> ReplacementToExternal; + for (const std::pair<Value *, Value *> &Pair : ReplacedExternals) + ReplacementToExternal.try_emplace(Pair.second, Pair.first); for (const std::pair<Value *, Value *> &Pair : ReplacedExternals) { - auto *It = ExternallyUsedValues.find(Pair.first); + Value *Ext = Pair.first; + auto RIt = ReplacementToExternal.find(Ext); + while (RIt != ReplacementToExternal.end()) { + Ext = RIt->second; + RIt = ReplacementToExternal.find(Ext); + } + auto *It = ExternallyUsedValues.find(Ext); if (It == ExternallyUsedValues.end()) continue; LocalExternallyUsedValues[Pair.second].append(It->second); @@ -15214,6 +15225,19 @@ private: assert(IsSupportedHorRdxIdentityOp && "The optimization of matched scalar identity horizontal reductions " "must be supported."); + auto *VTy = cast<FixedVectorType>(VectorizedValue->getType()); + if (VTy->getElementType() != VL.front()->getType()) { + VectorizedValue = Builder.CreateIntCast( + VectorizedValue, + FixedVectorType::get(VL.front()->getType(), VTy->getNumElements()), + any_of(VL, [&](Value *R) { + KnownBits Known = computeKnownBits( + R, cast<Instruction>(ReductionOps.front().front()) + ->getModule() + ->getDataLayout()); + return !Known.isNonNegative(); + })); + } switch (RdxKind) { case RecurKind::Add: { // root = mul prev_root, <1, 1, n, 1> @@ -16217,10 +16241,13 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector<Value *> Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, they are marked as deleted, so remove - // them from the set of candidates. - Candidates.remove_if( - [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); }); + // initially collected them or their index is optimized to constant value. + // If so, they are marked as deleted, so remove them from the set of + // candidates. + Candidates.remove_if([&R](Value *I) { + return R.isDeleted(cast<Instruction>(I)) || + isa<Constant>(cast<GetElementPtrInst>(I)->idx_begin()->get()); + }); // Remove from the set of candidates all pairs of getelementptrs with // constant differences. Such getelementptrs are likely not good diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp index b6e56c47c227..3eeb1a6948f2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1136,29 +1136,18 @@ void VPlanIngredient::print(raw_ostream &O) const { template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); void VPValue::replaceAllUsesWith(VPValue *New) { - if (this == New) - return; - for (unsigned J = 0; J < getNumUsers();) { - VPUser *User = Users[J]; - bool RemovedUser = false; - for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) - if (User->getOperand(I) == this) { - User->setOperand(I, New); - RemovedUser = true; - } - // If a user got removed after updating the current user, the next user to - // update will be moved to the current position, so we only need to - // increment the index if the number of users did not change. - if (!RemovedUser) - J++; - } + replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; }); } void VPValue::replaceUsesWithIf( VPValue *New, llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) { + // Note that this early exit is required for correctness; the implementation + // below relies on the number of users for this VPValue to decrease, which + // isn't the case if this == New. if (this == New) return; + for (unsigned J = 0; J < getNumUsers();) { VPUser *User = Users[J]; bool RemovedUser = false; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h index 4b4f4911eb64..0c6214868d84 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h @@ -12,10 +12,12 @@ /// VPBlockBase, together implementing a Hierarchical CFG; /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained /// within VPBasicBlocks; -/// 3. VPInstruction, a concrete Recipe and VPUser modeling a single planned +/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that +/// also inherit from VPValue. +/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned /// instruction; -/// 4. The VPlan class holding a candidate for vectorization; -/// 5. The VPlanPrinter class providing a way to print a plan in dot format; +/// 5. The VPlan class holding a candidate for vectorization; +/// 6. The VPlanPrinter class providing a way to print a plan in dot format; /// These are documented in docs/VectorizationPlan.rst. // //===----------------------------------------------------------------------===// @@ -700,8 +702,8 @@ public: /// VPRecipeBase is a base class modeling a sequence of one or more output IR /// instructions. VPRecipeBase owns the VPValues it defines through VPDef /// and is responsible for deleting its defined values. Single-value -/// VPRecipeBases that also inherit from VPValue must make sure to inherit from -/// VPRecipeBase before VPValue. +/// recipes must inherit from VPSingleDef instead of inheriting from both +/// VPRecipeBase and VPValue separately. class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>, public VPDef, public VPUser { @@ -762,15 +764,6 @@ public: /// \returns an iterator pointing to the element after the erased one iplist<VPRecipeBase>::iterator eraseFromParent(); - /// Returns the underlying instruction, if the recipe is a VPValue or nullptr - /// otherwise. - Instruction *getUnderlyingInstr() { - return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()); - } - const Instruction *getUnderlyingInstr() const { - return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()); - } - /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPDef *D) { // All VPDefs are also VPRecipeBases. @@ -819,10 +812,80 @@ public: } \ static inline bool classof(const VPRecipeBase *R) { \ return R->getVPDefID() == VPDefID; \ + } \ + static inline bool classof(const VPSingleDefRecipe *R) { \ + return R->getVPDefID() == VPDefID; \ } +/// VPSingleDef is a base class for recipes for modeling a sequence of one or +/// more output IR that define a single result VPValue. +/// Note that VPRecipeBase must be inherited from before VPValue. +class VPSingleDefRecipe : public VPRecipeBase, public VPValue { +public: + template <typename IterT> + VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {}) + : VPRecipeBase(SC, Operands, DL), VPValue(this) {} + + VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands, + DebugLoc DL = {}) + : VPRecipeBase(SC, Operands, DL), VPValue(this) {} + + template <typename IterT> + VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, + DebugLoc DL = {}) + : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {} + + static inline bool classof(const VPRecipeBase *R) { + switch (R->getVPDefID()) { + case VPRecipeBase::VPDerivedIVSC: + case VPRecipeBase::VPExpandSCEVSC: + case VPRecipeBase::VPInstructionSC: + case VPRecipeBase::VPReductionSC: + case VPRecipeBase::VPReplicateSC: + case VPRecipeBase::VPScalarIVStepsSC: + case VPRecipeBase::VPVectorPointerSC: + case VPRecipeBase::VPWidenCallSC: + case VPRecipeBase::VPWidenCanonicalIVSC: + case VPRecipeBase::VPWidenCastSC: + case VPRecipeBase::VPWidenGEPSC: + case VPRecipeBase::VPWidenSC: + case VPRecipeBase::VPWidenSelectSC: + case VPRecipeBase::VPBlendSC: + case VPRecipeBase::VPPredInstPHISC: + case VPRecipeBase::VPCanonicalIVPHISC: + case VPRecipeBase::VPActiveLaneMaskPHISC: + case VPRecipeBase::VPFirstOrderRecurrencePHISC: + case VPRecipeBase::VPWidenPHISC: + case VPRecipeBase::VPWidenIntOrFpInductionSC: + case VPRecipeBase::VPWidenPointerInductionSC: + case VPRecipeBase::VPReductionPHISC: + return true; + case VPRecipeBase::VPInterleaveSC: + case VPRecipeBase::VPBranchOnMaskSC: + case VPRecipeBase::VPWidenMemoryInstructionSC: + // TODO: Widened stores don't define a value, but widened loads do. Split + // the recipes to be able to make widened loads VPSingleDefRecipes. + return false; + } + llvm_unreachable("Unhandled VPDefID"); + } + + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast<VPRecipeBase>(U); + return R && classof(R); + } + + /// Returns the underlying instruction. + Instruction *getUnderlyingInstr() { + return cast<Instruction>(getUnderlyingValue()); + } + const Instruction *getUnderlyingInstr() const { + return cast<Instruction>(getUnderlyingValue()); + } +}; + /// Class to record LLVM IR flag for a recipe along with it. -class VPRecipeWithIRFlags : public VPRecipeBase { +class VPRecipeWithIRFlags : public VPSingleDefRecipe { enum class OperationType : unsigned char { Cmp, OverflowingBinOp, @@ -886,14 +949,14 @@ private: public: template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {}) - : VPRecipeBase(SC, Operands, DL) { + : VPSingleDefRecipe(SC, Operands, DL) { OpType = OperationType::Other; AllFlags = 0; } template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I) - : VPRecipeWithIRFlags(SC, Operands, I.getDebugLoc()) { + : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()) { if (auto *Op = dyn_cast<CmpInst>(&I)) { OpType = OperationType::Cmp; CmpPredicate = Op->getPredicate(); @@ -915,32 +978,35 @@ public: } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) { OpType = OperationType::FPMathOp; FMFs = Op->getFastMathFlags(); + } else { + OpType = OperationType::Other; + AllFlags = 0; } } template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL = {}) - : VPRecipeBase(SC, Operands, DL), OpType(OperationType::Cmp), + : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp), CmpPredicate(Pred) {} template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL = {}) - : VPRecipeBase(SC, Operands, DL), OpType(OperationType::OverflowingBinOp), - WrapFlags(WrapFlags) {} + : VPSingleDefRecipe(SC, Operands, DL), + OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {} template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL = {}) - : VPRecipeBase(SC, Operands, DL), OpType(OperationType::FPMathOp), + : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp), FMFs(FMFs) {} protected: template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPFlagsTy GEPFlags, DebugLoc DL = {}) - : VPRecipeBase(SC, Operands, DL), OpType(OperationType::GEPOp), + : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {} public: @@ -1056,7 +1122,7 @@ public: /// While as any Recipe it may generate a sequence of IR instructions when /// executed, these instructions would always form a single-def expression as /// the VPInstruction is also a single def-use vertex. -class VPInstruction : public VPRecipeWithIRFlags, public VPValue { +class VPInstruction : public VPRecipeWithIRFlags { friend class VPlanSlp; public: @@ -1103,7 +1169,7 @@ public: VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "") : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL), - VPValue(this), Opcode(Opcode), Name(Name.str()) {} + Opcode(Opcode), Name(Name.str()) {} VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, DebugLoc DL = {}, const Twine &Name = "") @@ -1115,7 +1181,7 @@ public: VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "") : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL), - VPValue(this), Opcode(Opcode), Name(Name.str()) {} + Opcode(Opcode), Name(Name.str()) {} VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = ""); @@ -1205,13 +1271,13 @@ public: /// VPWidenRecipe is a recipe for producing a copy of vector type its /// ingredient. This recipe covers most of the traditional vectorization cases /// where each ingredient transforms into a vectorized version of itself. -class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPWidenRecipe : public VPRecipeWithIRFlags { unsigned Opcode; public: template <typename IterT> VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands) - : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPValue(this, &I), + : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), Opcode(I.getOpcode()) {} ~VPWidenRecipe() override = default; @@ -1231,7 +1297,7 @@ public: }; /// VPWidenCastRecipe is a recipe to create vector cast instructions. -class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPWidenCastRecipe : public VPRecipeWithIRFlags { /// Cast instruction opcode. Instruction::CastOps Opcode; @@ -1241,8 +1307,8 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue { public: VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI) - : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPValue(this, &UI), - Opcode(Opcode), ResultTy(ResultTy) { + : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode), + ResultTy(ResultTy) { assert(UI.getOpcode() == Opcode && "opcode of underlying cast doesn't match"); assert(UI.getType() == ResultTy && @@ -1250,8 +1316,8 @@ public: } VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) - : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), VPValue(this, nullptr), - Opcode(Opcode), ResultTy(ResultTy) {} + : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode), + ResultTy(ResultTy) {} ~VPWidenCastRecipe() override = default; @@ -1273,7 +1339,7 @@ public: }; /// A recipe for widening Call instructions. -class VPWidenCallRecipe : public VPRecipeBase, public VPValue { +class VPWidenCallRecipe : public VPSingleDefRecipe { /// ID of the vector intrinsic to call when widening the call. If set the /// Intrinsic::not_intrinsic, a library call will be used instead. Intrinsic::ID VectorIntrinsicID; @@ -1286,9 +1352,9 @@ class VPWidenCallRecipe : public VPRecipeBase, public VPValue { public: template <typename IterT> VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments, - Intrinsic::ID VectorIntrinsicID, + Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {}, Function *Variant = nullptr) - : VPRecipeBase(VPDef::VPWidenCallSC, CallArguments), VPValue(this, &I), + : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, &I, DL), VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {} ~VPWidenCallRecipe() override = default; @@ -1306,11 +1372,11 @@ public: }; /// A recipe for widening select instructions. -struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue { +struct VPWidenSelectRecipe : public VPSingleDefRecipe { template <typename IterT> VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands) - : VPRecipeBase(VPDef::VPWidenSelectSC, Operands, I.getDebugLoc()), - VPValue(this, &I) {} + : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I, + I.getDebugLoc()) {} ~VPWidenSelectRecipe() override = default; @@ -1335,7 +1401,7 @@ struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue { }; /// A recipe for handling GEP instructions. -class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPWidenGEPRecipe : public VPRecipeWithIRFlags { bool isPointerLoopInvariant() const { return getOperand(0)->isDefinedOutsideVectorRegions(); } @@ -1353,8 +1419,7 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue { public: template <typename IterT> VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands) - : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP), - VPValue(this, GEP) {} + : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {} ~VPWidenGEPRecipe() override = default; @@ -1373,7 +1438,7 @@ public: /// A recipe to compute the pointers for widened memory accesses of IndexTy for /// all parts. If IsReverse is true, compute pointers for accessing the input in /// reverse order per part. -class VPVectorPointerRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPVectorPointerRecipe : public VPRecipeWithIRFlags { Type *IndexedTy; bool IsReverse; @@ -1382,7 +1447,7 @@ public: bool IsInBounds, DebugLoc DL) : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr), GEPFlagsTy(IsInBounds), DL), - VPValue(this), IndexedTy(IndexedTy), IsReverse(IsReverse) {} + IndexedTy(IndexedTy), IsReverse(IsReverse) {} VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) @@ -1424,11 +1489,11 @@ public: /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a /// pointer induction. Produces either a vector PHI per-part or scalar values /// per-lane based on the canonical induction. -class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue { +class VPHeaderPHIRecipe : public VPSingleDefRecipe { protected: VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start = nullptr, DebugLoc DL = {}) - : VPRecipeBase(VPDefID, {}, DL), VPValue(this, UnderlyingInstr) { + : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) { if (Start) addOperand(Start); } @@ -1709,14 +1774,13 @@ public: /// A recipe for vectorizing a phi-node as a sequence of mask-based select /// instructions. -class VPBlendRecipe : public VPRecipeBase, public VPValue { +class VPBlendRecipe : public VPSingleDefRecipe { public: /// The blend operation is a User of the incoming values and of their /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value /// might be incoming with a full mask for which there is no VPValue. VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands) - : VPRecipeBase(VPDef::VPBlendSC, Operands, Phi->getDebugLoc()), - VPValue(this, Phi) { + : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) { assert(Operands.size() > 0 && ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && "Expected either a single incoming value or a positive even number " @@ -1843,14 +1907,15 @@ public: /// A recipe to represent inloop reduction operations, performing a reduction on /// a vector operand into a scalar value, and adding the result to a chain. /// The Operands are {ChainOp, VecOp, [Condition]}. -class VPReductionRecipe : public VPRecipeBase, public VPValue { +class VPReductionRecipe : public VPSingleDefRecipe { /// The recurrence decriptor for the reduction in question. const RecurrenceDescriptor &RdxDesc; public: VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp) - : VPRecipeBase(VPDef::VPReductionSC, {ChainOp, VecOp}), VPValue(this, I), + : VPSingleDefRecipe(VPDef::VPReductionSC, + ArrayRef<VPValue *>({ChainOp, VecOp}), I), RdxDesc(R) { if (CondOp) addOperand(CondOp); @@ -1883,7 +1948,7 @@ public: /// copies of the original scalar type, one per lane, instead of producing a /// single copy of widened type for all lanes. If the instruction is known to be /// uniform only one copy, per lane zero, will be generated. -class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPReplicateRecipe : public VPRecipeWithIRFlags { /// Indicator if only a single replica per lane is needed. bool IsUniform; @@ -1895,7 +1960,7 @@ public: VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands, bool IsUniform, VPValue *Mask = nullptr) : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I), - VPValue(this, I), IsUniform(IsUniform), IsPredicated(Mask) { + IsUniform(IsUniform), IsPredicated(Mask) { if (Mask) addOperand(Mask); } @@ -1993,12 +2058,12 @@ public: /// order to merge values that are set under such a branch and feed their uses. /// The phi nodes can be scalar or vector depending on the users of the value. /// This recipe works in concert with VPBranchOnMaskRecipe. -class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue { +class VPPredInstPHIRecipe : public VPSingleDefRecipe { public: /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(VPValue *PredV) - : VPRecipeBase(VPDef::VPPredInstPHISC, PredV), VPValue(this) {} + : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {} ~VPPredInstPHIRecipe() override = default; VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC) @@ -2119,14 +2184,13 @@ public: }; /// Recipe to expand a SCEV expression. -class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue { +class VPExpandSCEVRecipe : public VPSingleDefRecipe { const SCEV *Expr; ScalarEvolution &SE; public: VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE) - : VPRecipeBase(VPDef::VPExpandSCEVSC, {}), VPValue(this), Expr(Expr), - SE(SE) {} + : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {} ~VPExpandSCEVRecipe() override = default; @@ -2225,11 +2289,10 @@ public: }; /// A Recipe for widening the canonical induction variable of the vector loop. -class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { +class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe { public: VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV) - : VPRecipeBase(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}), - VPValue(this) {} + : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {} ~VPWidenCanonicalIVRecipe() override = default; @@ -2256,7 +2319,7 @@ public: /// A recipe for converting the canonical IV value to the corresponding value of /// an IV with different start and step values, using Start + CanonicalIV * /// Step. -class VPDerivedIVRecipe : public VPRecipeBase, public VPValue { +class VPDerivedIVRecipe : public VPSingleDefRecipe { /// If not nullptr, the result of the induction will get truncated to /// TruncResultTy. Type *TruncResultTy; @@ -2271,8 +2334,8 @@ public: VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, Type *TruncResultTy) - : VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), - VPValue(this), TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()), + : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), + TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()), FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) { } @@ -2309,7 +2372,7 @@ public: /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their scalar values. -class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags { Instruction::BinaryOps InductionOpcode; public: @@ -2317,7 +2380,7 @@ public: Instruction::BinaryOps Opcode, FastMathFlags FMFs) : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC, ArrayRef<VPValue *>({IV, Step}), FMFs), - VPValue(this), InductionOpcode(Opcode) {} + InductionOpcode(Opcode) {} VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index f950d4740e41..94456bf858d9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -283,7 +283,7 @@ VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) { void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB) { VPIRBuilder.setInsertPoint(VPBB); - for (Instruction &InstRef : *BB) { + for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) { Instruction *Inst = &InstRef; // There shouldn't be any VPValue for Inst at this point. Otherwise, we diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1f844bce2310..bbeb5da2cfec 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -258,7 +258,7 @@ VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, const Twine &Name) : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}), Pred, DL), - VPValue(this), Opcode(Opcode), Name(Name.str()) { + Opcode(Opcode), Name(Name.str()) { assert(Opcode == Instruction::ICmp && "only ICmp predicates supported at the moment"); } @@ -267,7 +267,7 @@ VPInstruction::VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, FastMathFlags FMFs, DebugLoc DL, const Twine &Name) : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL), - VPValue(this), Opcode(Opcode), Name(Name.str()) { + Opcode(Opcode), Name(Name.str()) { // Make sure the VPInstruction is a floating-point operation. assert(isFPMathOp() && "this op can't take fast-math flags"); } @@ -580,7 +580,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { auto &CI = *cast<CallInst>(getUnderlyingInstr()); assert(!isa<DbgInfoIntrinsic>(CI) && "DbgInfoIntrinsic should have been dropped during VPlan construction"); - State.setDebugLocFrom(CI.getDebugLoc()); + State.setDebugLocFrom(getDebugLoc()); bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; FunctionType *VFTy = nullptr; @@ -1712,16 +1712,20 @@ void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPReductionPHIRecipe::execute(VPTransformState &State) { - PHINode *PN = cast<PHINode>(getUnderlyingValue()); auto &Builder = State.Builder; + // Reductions do not have to start at zero. They can start with + // any loop invariant values. + VPValue *StartVPV = getStartValue(); + Value *StartV = StartVPV->getLiveInIRValue(); + // In order to support recurrences we need to be able to vectorize Phi nodes. // Phi nodes have cycles, so we need to vectorize them in two stages. This is // stage #1: We create a new vector PHI node with no incoming edges. We'll use // this value when we vectorize all of the instructions that use the PHI. bool ScalarPHI = State.VF.isScalar() || IsInLoop; - Type *VecTy = - ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); + Type *VecTy = ScalarPHI ? StartV->getType() + : VectorType::get(StartV->getType(), State.VF); BasicBlock *HeaderBB = State.CFG.PrevBB; assert(State.CurrentVectorLoop->getHeader() == HeaderBB && @@ -1735,11 +1739,6 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - // Reductions do not have to start at zero. They can start with - // any loop invariant values. - VPValue *StartVPV = getStartValue(); - Value *StartV = StartVPV->getLiveInIRValue(); - Value *Iden = nullptr; RecurKind RK = RdxDesc.getRecurrenceKind(); if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 5c430620a2dc..8e6b48cdb2c8 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -74,9 +74,9 @@ void VPlanTransforms::VPInstructionsToVPRecipes( } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { - NewRecipe = - new VPWidenCallRecipe(*CI, drop_end(Ingredient.operands()), - getVectorIntrinsicIDForCall(CI, &TLI)); + NewRecipe = new VPWidenCallRecipe( + *CI, drop_end(Ingredient.operands()), + getVectorIntrinsicIDForCall(CI, &TLI), CI->getDebugLoc()); } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands()); } else if (auto *CI = dyn_cast<CastInst>(Inst)) { @@ -103,7 +103,7 @@ static bool sinkScalarOperands(VPlan &Plan) { bool Changed = false; // First, collect the operands of all recipes in replicate blocks as seeds for // sinking. - SetVector<std::pair<VPBasicBlock *, VPRecipeBase *>> WorkList; + SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList; for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) { VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock(); if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2) @@ -113,7 +113,8 @@ static bool sinkScalarOperands(VPlan &Plan) { continue; for (auto &Recipe : *VPBB) { for (VPValue *Op : Recipe.operands()) - if (auto *Def = Op->getDefiningRecipe()) + if (auto *Def = + dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe())) WorkList.insert(std::make_pair(VPBB, Def)); } } @@ -122,7 +123,7 @@ static bool sinkScalarOperands(VPlan &Plan) { // Try to sink each replicate or scalar IV steps recipe in the worklist. for (unsigned I = 0; I != WorkList.size(); ++I) { VPBasicBlock *SinkTo; - VPRecipeBase *SinkCandidate; + VPSingleDefRecipe *SinkCandidate; std::tie(SinkTo, SinkCandidate) = WorkList[I]; if (SinkCandidate->getParent() == SinkTo || SinkCandidate->mayHaveSideEffects() || @@ -146,12 +147,11 @@ static bool sinkScalarOperands(VPlan &Plan) { return false; if (UI->getParent() == SinkTo) return true; - NeedsDuplicating = - UI->onlyFirstLaneUsed(SinkCandidate->getVPSingleValue()); + NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate); // We only know how to duplicate VPRecipeRecipes for now. return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate); }; - if (!all_of(SinkCandidate->getVPSingleValue()->users(), CanSinkWithUser)) + if (!all_of(SinkCandidate->users(), CanSinkWithUser)) continue; if (NeedsDuplicating) { @@ -163,14 +163,14 @@ static bool sinkScalarOperands(VPlan &Plan) { // TODO: add ".cloned" suffix to name of Clone's VPValue. Clone->insertBefore(SinkCandidate); - SinkCandidate->getVPSingleValue()->replaceUsesWithIf( - Clone, [SinkTo](VPUser &U, unsigned) { - return cast<VPRecipeBase>(&U)->getParent() != SinkTo; - }); + SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) { + return cast<VPRecipeBase>(&U)->getParent() != SinkTo; + }); } SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi()); for (VPValue *Op : SinkCandidate->operands()) - if (auto *Def = Op->getDefiningRecipe()) + if (auto *Def = + dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe())) WorkList.insert(std::make_pair(SinkTo, Def)); Changed = true; } @@ -412,16 +412,15 @@ void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { auto &Casts = IV->getInductionDescriptor().getCastInsts(); VPValue *FindMyCast = IV; for (Instruction *IRCast : reverse(Casts)) { - VPRecipeBase *FoundUserCast = nullptr; + VPSingleDefRecipe *FoundUserCast = nullptr; for (auto *U : FindMyCast->users()) { - auto *UserCast = cast<VPRecipeBase>(U); - if (UserCast->getNumDefinedValues() == 1 && - UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) { + auto *UserCast = dyn_cast<VPSingleDefRecipe>(U); + if (UserCast && UserCast->getUnderlyingValue() == IRCast) { FoundUserCast = UserCast; break; } } - FindMyCast = FoundUserCast->getVPSingleValue(); + FindMyCast = FoundUserCast; } FindMyCast->replaceAllUsesWith(IV); } @@ -895,7 +894,10 @@ void VPlanTransforms::truncateToMinimalBitwidths( vp_depth_first_deep(Plan.getVectorLoopRegion()))) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe, - VPWidenSelectRecipe>(&R)) + VPWidenSelectRecipe, VPWidenMemoryInstructionRecipe>(&R)) + continue; + if (isa<VPWidenMemoryInstructionRecipe>(&R) && + cast<VPWidenMemoryInstructionRecipe>(&R)->isStore()) continue; VPValue *ResultVPV = R.getVPSingleValue(); @@ -948,6 +950,23 @@ void VPlanTransforms::truncateToMinimalBitwidths( auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits); + // Any wrapping introduced by shrinking this operation shouldn't be + // considered undefined behavior. So, we can't unconditionally copy + // arithmetic wrapping flags to VPW. + if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R)) + VPW->dropPoisonGeneratingFlags(); + + // Extend result to original width. + auto *Ext = new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy); + Ext->insertAfter(&R); + ResultVPV->replaceAllUsesWith(Ext); + Ext->setOperand(0, ResultVPV); + + if (isa<VPWidenMemoryInstructionRecipe>(&R)) { + assert(!cast<VPWidenMemoryInstructionRecipe>(&R)->isStore() && "stores cannot be narrowed"); + continue; + } + // Shrink operands by introducing truncates as needed. unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0; for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) { @@ -979,17 +998,6 @@ void VPlanTransforms::truncateToMinimalBitwidths( } } - // Any wrapping introduced by shrinking this operation shouldn't be - // considered undefined behavior. So, we can't unconditionally copy - // arithmetic wrapping flags to VPW. - if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R)) - VPW->dropPoisonGeneratingFlags(); - - // Extend result to original width. - auto *Ext = new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy); - Ext->insertAfter(&R); - ResultVPV->replaceAllUsesWith(Ext); - Ext->setOperand(0, ResultVPV); } } @@ -1130,7 +1138,7 @@ void VPlanTransforms::addActiveLaneMask( "Must have widened canonical IV when tail folding!"); auto *WideCanonicalIV = cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser); - VPRecipeBase *LaneMask; + VPSingleDefRecipe *LaneMask; if (UseActiveLaneMaskForControlFlow) { LaneMask = addVPLaneMaskPhiAndUpdateExitBranch( Plan, DataAndControlFlowWithoutRuntimeCheck); @@ -1155,7 +1163,7 @@ void VPlanTransforms::addActiveLaneMask( assert(CompareToReplace->getOperand(0) == WideCanonicalIV && "WidenCanonicalIV must be the first operand of the compare"); - CompareToReplace->replaceAllUsesWith(LaneMask->getVPSingleValue()); + CompareToReplace->replaceAllUsesWith(LaneMask); CompareToReplace->eraseFromParent(); } } |