aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Vectorize')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h193
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp74
8 files changed, 268 insertions, 166 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index cff72ae263d8..a7ebf78e54ce 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -148,26 +148,27 @@ public:
VPInstruction *createOverflowingOp(unsigned Opcode,
std::initializer_list<VPValue *> Operands,
VPRecipeWithIRFlags::WrapFlagsTy WrapFlags,
- DebugLoc DL, const Twine &Name = "") {
+ DebugLoc DL = {}, const Twine &Name = "") {
return tryInsertInstruction(
new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
}
- VPValue *createNot(VPValue *Operand, DebugLoc DL, const Twine &Name = "") {
+ VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
+ const Twine &Name = "") {
return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
}
- VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL,
+ VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
}
- VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL,
+ VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name);
}
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
- DebugLoc DL, const Twine &Name = "",
+ DebugLoc DL = {}, const Twine &Name = "",
std::optional<FastMathFlags> FMFs = std::nullopt) {
auto *Select =
FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9743fa0e7402..6ca93e15719f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2346,9 +2346,8 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
auto *Offset = CreateMul(Index, Step);
return CreateAdd(StartValue, Offset);
}
- case InductionDescriptor::IK_PtrInduction: {
- return B.CreateGEP(B.getInt8Ty(), StartValue, CreateMul(Index, Step));
- }
+ case InductionDescriptor::IK_PtrInduction:
+ return B.CreatePtrAdd(StartValue, CreateMul(Index, Step));
case InductionDescriptor::IK_FpInduction: {
assert(!isa<VectorType>(Index->getType()) &&
"Vector indices not supported for FP inductions yet");
@@ -6950,10 +6949,25 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
SmallVector<const Value *, 4> Operands(I->operand_values());
- return TTI.getArithmeticInstrCost(
+ auto InstrCost = TTI.getArithmeticInstrCost(
I->getOpcode(), VectorTy, CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
Op2Info, Operands, I);
+
+ // Some targets can replace frem with vector library calls.
+ InstructionCost VecCallCost = InstructionCost::getInvalid();
+ if (I->getOpcode() == Instruction::FRem) {
+ LibFunc Func;
+ if (TLI->getLibFunc(I->getOpcode(), I->getType(), Func) &&
+ TLI->isFunctionVectorizable(TLI->getName(Func), VF)) {
+ SmallVector<Type *, 4> OpTypes;
+ for (auto &Op : I->operands())
+ OpTypes.push_back(Op->getType());
+ VecCallCost =
+ TTI.getCallInstrCost(nullptr, VectorTy, OpTypes, CostKind);
+ }
+ }
+ return std::min(InstrCost, VecCallCost);
}
case Instruction::FNeg: {
return TTI.getArithmeticInstrCost(
@@ -8247,7 +8261,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
},
Range);
if (ShouldUseVectorIntrinsic)
- return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID);
+ return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID,
+ CI->getDebugLoc());
Function *Variant = nullptr;
std::optional<unsigned> MaskPos;
@@ -8300,7 +8315,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
}
return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()),
- Intrinsic::not_intrinsic, Variant);
+ Intrinsic::not_intrinsic, CI->getDebugLoc(),
+ Variant);
}
return nullptr;
@@ -8949,16 +8965,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
"AnyOf reductions are not allowed for in-loop reductions");
// Collect the chain of "link" recipes for the reduction starting at PhiR.
- SetVector<VPRecipeBase *> Worklist;
+ SetVector<VPSingleDefRecipe *> Worklist;
Worklist.insert(PhiR);
for (unsigned I = 0; I != Worklist.size(); ++I) {
- VPRecipeBase *Cur = Worklist[I];
- for (VPUser *U : Cur->getVPSingleValue()->users()) {
- auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
- if (!UserRecipe)
+ VPSingleDefRecipe *Cur = Worklist[I];
+ for (VPUser *U : Cur->users()) {
+ auto *UserRecipe = dyn_cast<VPSingleDefRecipe>(U);
+ if (!UserRecipe) {
+ assert(isa<VPLiveOut>(U) &&
+ "U must either be a VPSingleDef or VPLiveOut");
continue;
- assert(UserRecipe->getNumDefinedValues() == 1 &&
- "recipes must define exactly one result value");
+ }
Worklist.insert(UserRecipe);
}
}
@@ -8968,10 +8985,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// (PreviousLink) to tell which of the two operands of a Link will remain
// scalar and which will be reduced. For minmax by select(cmp), Link will be
// the select instructions.
- VPRecipeBase *PreviousLink = PhiR; // Aka Worklist[0].
- for (VPRecipeBase *CurrentLink : Worklist.getArrayRef().drop_front()) {
- VPValue *PreviousLinkV = PreviousLink->getVPSingleValue();
-
+ VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
+ for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef().drop_front()) {
Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
// Index of the first operand which holds a non-mask vector operand.
@@ -8986,7 +9001,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
"Expected instruction to be a call to the llvm.fmuladd intrinsic");
assert(((MinVF.isScalar() && isa<VPReplicateRecipe>(CurrentLink)) ||
isa<VPWidenCallRecipe>(CurrentLink)) &&
- CurrentLink->getOperand(2) == PreviousLinkV &&
+ CurrentLink->getOperand(2) == PreviousLink &&
"expected a call where the previous link is the added operand");
// If the instruction is a call to the llvm.fmuladd intrinsic then we
@@ -9017,15 +9032,15 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// Note that for non-commutable operands (cmp-selects), the semantics of
// the cmp-select are captured in the recurrence kind.
unsigned VecOpId =
- CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLinkV
+ CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
? IndexOfFirstOperand + 1
: IndexOfFirstOperand;
VecOp = CurrentLink->getOperand(VecOpId);
- assert(VecOp != PreviousLinkV &&
+ assert(VecOp != PreviousLink &&
CurrentLink->getOperand(CurrentLink->getNumOperands() - 1 -
(VecOpId - IndexOfFirstOperand)) ==
- PreviousLinkV &&
- "PreviousLinkV must be the operand other than VecOp");
+ PreviousLink &&
+ "PreviousLink must be the operand other than VecOp");
}
BasicBlock *BB = CurrentLinkI->getParent();
@@ -9037,19 +9052,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
}
VPReductionRecipe *RedRecipe = new VPReductionRecipe(
- RdxDesc, CurrentLinkI, PreviousLinkV, VecOp, CondOp);
+ RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp);
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Note that this transformation may leave over dead recipes (including
// CurrentLink), which will be cleaned by a later VPlan transform.
LinkVPBB->appendRecipe(RedRecipe);
- CurrentLink->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
+ CurrentLink->replaceAllUsesWith(RedRecipe);
PreviousLink = RedRecipe;
}
}
- Builder.setInsertPoint(&*LatchVPBB->begin());
- for (VPRecipeBase &R :
- Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
+ Builder.setInsertPoint(&*LatchVPBB->begin());
+ for (VPRecipeBase &R :
+ Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
if (!PhiR)
continue;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 055fbb00871f..601d2454c1e1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7379,6 +7379,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
continue;
if (Idx >= static_cast<int>(CommonVF))
Idx = E1Mask[Idx - CommonVF] + VF;
+ else
+ Idx = E1Mask[Idx];
}
CommonVF = VF;
}
@@ -12986,8 +12988,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
for (ScheduleData *BundleMember = Picked; BundleMember;
BundleMember = BundleMember->NextInBundle) {
Instruction *PickedInst = BundleMember->Inst;
- if (PickedInst->getNextNode() != LastScheduledInst)
- PickedInst->moveBefore(LastScheduledInst);
+ if (PickedInst->getNextNonDebugInstruction() != LastScheduledInst)
+ PickedInst->moveAfter(LastScheduledInst->getPrevNode());
LastScheduledInst = PickedInst;
}
@@ -13181,7 +13183,7 @@ void BoUpSLP::computeMinimumValueSizes() {
// We only attempt to truncate integer expressions.
auto &TreeRoot = VectorizableTree[0]->Scalars;
auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
- if (!TreeRootIT)
+ if (!TreeRootIT || VectorizableTree.front()->State == TreeEntry::NeedToGather)
return;
// Ensure the roots of the vectorizable tree don't form a cycle.
@@ -14792,8 +14794,17 @@ public:
LocalExternallyUsedValues[RdxVal];
// Update LocalExternallyUsedValues for the scalar, replaced by
// extractelement instructions.
+ DenseMap<Value *, Value *> ReplacementToExternal;
+ for (const std::pair<Value *, Value *> &Pair : ReplacedExternals)
+ ReplacementToExternal.try_emplace(Pair.second, Pair.first);
for (const std::pair<Value *, Value *> &Pair : ReplacedExternals) {
- auto *It = ExternallyUsedValues.find(Pair.first);
+ Value *Ext = Pair.first;
+ auto RIt = ReplacementToExternal.find(Ext);
+ while (RIt != ReplacementToExternal.end()) {
+ Ext = RIt->second;
+ RIt = ReplacementToExternal.find(Ext);
+ }
+ auto *It = ExternallyUsedValues.find(Ext);
if (It == ExternallyUsedValues.end())
continue;
LocalExternallyUsedValues[Pair.second].append(It->second);
@@ -15214,6 +15225,19 @@ private:
assert(IsSupportedHorRdxIdentityOp &&
"The optimization of matched scalar identity horizontal reductions "
"must be supported.");
+ auto *VTy = cast<FixedVectorType>(VectorizedValue->getType());
+ if (VTy->getElementType() != VL.front()->getType()) {
+ VectorizedValue = Builder.CreateIntCast(
+ VectorizedValue,
+ FixedVectorType::get(VL.front()->getType(), VTy->getNumElements()),
+ any_of(VL, [&](Value *R) {
+ KnownBits Known = computeKnownBits(
+ R, cast<Instruction>(ReductionOps.front().front())
+ ->getModule()
+ ->getDataLayout());
+ return !Known.isNonNegative();
+ }));
+ }
switch (RdxKind) {
case RecurKind::Add: {
// root = mul prev_root, <1, 1, n, 1>
@@ -16217,10 +16241,13 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
// Some of the candidates may have already been vectorized after we
- // initially collected them. If so, they are marked as deleted, so remove
- // them from the set of candidates.
- Candidates.remove_if(
- [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); });
+ // initially collected them or their index is optimized to constant value.
+ // If so, they are marked as deleted, so remove them from the set of
+ // candidates.
+ Candidates.remove_if([&R](Value *I) {
+ return R.isDeleted(cast<Instruction>(I)) ||
+ isa<Constant>(cast<GetElementPtrInst>(I)->idx_begin()->get());
+ });
// Remove from the set of candidates all pairs of getelementptrs with
// constant differences. Such getelementptrs are likely not good
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
index b6e56c47c227..3eeb1a6948f2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1136,29 +1136,18 @@ void VPlanIngredient::print(raw_ostream &O) const {
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
void VPValue::replaceAllUsesWith(VPValue *New) {
- if (this == New)
- return;
- for (unsigned J = 0; J < getNumUsers();) {
- VPUser *User = Users[J];
- bool RemovedUser = false;
- for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
- if (User->getOperand(I) == this) {
- User->setOperand(I, New);
- RemovedUser = true;
- }
- // If a user got removed after updating the current user, the next user to
- // update will be moved to the current position, so we only need to
- // increment the index if the number of users did not change.
- if (!RemovedUser)
- J++;
- }
+ replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
}
void VPValue::replaceUsesWithIf(
VPValue *New,
llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
+ // Note that this early exit is required for correctness; the implementation
+ // below relies on the number of users for this VPValue to decrease, which
+ // isn't the case if this == New.
if (this == New)
return;
+
for (unsigned J = 0; J < getNumUsers();) {
VPUser *User = Users[J];
bool RemovedUser = false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
index 4b4f4911eb64..0c6214868d84 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -12,10 +12,12 @@
/// VPBlockBase, together implementing a Hierarchical CFG;
/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
/// within VPBasicBlocks;
-/// 3. VPInstruction, a concrete Recipe and VPUser modeling a single planned
+/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
+/// also inherit from VPValue.
+/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
/// instruction;
-/// 4. The VPlan class holding a candidate for vectorization;
-/// 5. The VPlanPrinter class providing a way to print a plan in dot format;
+/// 5. The VPlan class holding a candidate for vectorization;
+/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
/// These are documented in docs/VectorizationPlan.rst.
//
//===----------------------------------------------------------------------===//
@@ -700,8 +702,8 @@ public:
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
/// and is responsible for deleting its defined values. Single-value
-/// VPRecipeBases that also inherit from VPValue must make sure to inherit from
-/// VPRecipeBase before VPValue.
+/// recipes must inherit from VPSingleDef instead of inheriting from both
+/// VPRecipeBase and VPValue separately.
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
public VPDef,
public VPUser {
@@ -762,15 +764,6 @@ public:
/// \returns an iterator pointing to the element after the erased one
iplist<VPRecipeBase>::iterator eraseFromParent();
- /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
- /// otherwise.
- Instruction *getUnderlyingInstr() {
- return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
- }
- const Instruction *getUnderlyingInstr() const {
- return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
- }
-
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPDef *D) {
// All VPDefs are also VPRecipeBases.
@@ -819,10 +812,80 @@ public:
} \
static inline bool classof(const VPRecipeBase *R) { \
return R->getVPDefID() == VPDefID; \
+ } \
+ static inline bool classof(const VPSingleDefRecipe *R) { \
+ return R->getVPDefID() == VPDefID; \
}
+/// VPSingleDef is a base class for recipes for modeling a sequence of one or
+/// more output IR that define a single result VPValue.
+/// Note that VPRecipeBase must be inherited from before VPValue.
+class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
+public:
+ template <typename IterT>
+ VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
+ : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
+
+ VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
+ DebugLoc DL = {})
+ : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
+
+ template <typename IterT>
+ VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
+ DebugLoc DL = {})
+ : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
+
+ static inline bool classof(const VPRecipeBase *R) {
+ switch (R->getVPDefID()) {
+ case VPRecipeBase::VPDerivedIVSC:
+ case VPRecipeBase::VPExpandSCEVSC:
+ case VPRecipeBase::VPInstructionSC:
+ case VPRecipeBase::VPReductionSC:
+ case VPRecipeBase::VPReplicateSC:
+ case VPRecipeBase::VPScalarIVStepsSC:
+ case VPRecipeBase::VPVectorPointerSC:
+ case VPRecipeBase::VPWidenCallSC:
+ case VPRecipeBase::VPWidenCanonicalIVSC:
+ case VPRecipeBase::VPWidenCastSC:
+ case VPRecipeBase::VPWidenGEPSC:
+ case VPRecipeBase::VPWidenSC:
+ case VPRecipeBase::VPWidenSelectSC:
+ case VPRecipeBase::VPBlendSC:
+ case VPRecipeBase::VPPredInstPHISC:
+ case VPRecipeBase::VPCanonicalIVPHISC:
+ case VPRecipeBase::VPActiveLaneMaskPHISC:
+ case VPRecipeBase::VPFirstOrderRecurrencePHISC:
+ case VPRecipeBase::VPWidenPHISC:
+ case VPRecipeBase::VPWidenIntOrFpInductionSC:
+ case VPRecipeBase::VPWidenPointerInductionSC:
+ case VPRecipeBase::VPReductionPHISC:
+ return true;
+ case VPRecipeBase::VPInterleaveSC:
+ case VPRecipeBase::VPBranchOnMaskSC:
+ case VPRecipeBase::VPWidenMemoryInstructionSC:
+ // TODO: Widened stores don't define a value, but widened loads do. Split
+ // the recipes to be able to make widened loads VPSingleDefRecipes.
+ return false;
+ }
+ llvm_unreachable("Unhandled VPDefID");
+ }
+
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
+
+ /// Returns the underlying instruction.
+ Instruction *getUnderlyingInstr() {
+ return cast<Instruction>(getUnderlyingValue());
+ }
+ const Instruction *getUnderlyingInstr() const {
+ return cast<Instruction>(getUnderlyingValue());
+ }
+};
+
/// Class to record LLVM IR flag for a recipe along with it.
-class VPRecipeWithIRFlags : public VPRecipeBase {
+class VPRecipeWithIRFlags : public VPSingleDefRecipe {
enum class OperationType : unsigned char {
Cmp,
OverflowingBinOp,
@@ -886,14 +949,14 @@ private:
public:
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
- : VPRecipeBase(SC, Operands, DL) {
+ : VPSingleDefRecipe(SC, Operands, DL) {
OpType = OperationType::Other;
AllFlags = 0;
}
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
- : VPRecipeWithIRFlags(SC, Operands, I.getDebugLoc()) {
+ : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()) {
if (auto *Op = dyn_cast<CmpInst>(&I)) {
OpType = OperationType::Cmp;
CmpPredicate = Op->getPredicate();
@@ -915,32 +978,35 @@ public:
} else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
OpType = OperationType::FPMathOp;
FMFs = Op->getFastMathFlags();
+ } else {
+ OpType = OperationType::Other;
+ AllFlags = 0;
}
}
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
CmpInst::Predicate Pred, DebugLoc DL = {})
- : VPRecipeBase(SC, Operands, DL), OpType(OperationType::Cmp),
+ : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
CmpPredicate(Pred) {}
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
WrapFlagsTy WrapFlags, DebugLoc DL = {})
- : VPRecipeBase(SC, Operands, DL), OpType(OperationType::OverflowingBinOp),
- WrapFlags(WrapFlags) {}
+ : VPSingleDefRecipe(SC, Operands, DL),
+ OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
FastMathFlags FMFs, DebugLoc DL = {})
- : VPRecipeBase(SC, Operands, DL), OpType(OperationType::FPMathOp),
+ : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
FMFs(FMFs) {}
protected:
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
GEPFlagsTy GEPFlags, DebugLoc DL = {})
- : VPRecipeBase(SC, Operands, DL), OpType(OperationType::GEPOp),
+ : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
GEPFlags(GEPFlags) {}
public:
@@ -1056,7 +1122,7 @@ public:
/// While as any Recipe it may generate a sequence of IR instructions when
/// executed, these instructions would always form a single-def expression as
/// the VPInstruction is also a single def-use vertex.
-class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
+class VPInstruction : public VPRecipeWithIRFlags {
friend class VPlanSlp;
public:
@@ -1103,7 +1169,7 @@ public:
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
const Twine &Name = "")
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
- VPValue(this), Opcode(Opcode), Name(Name.str()) {}
+ Opcode(Opcode), Name(Name.str()) {}
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
DebugLoc DL = {}, const Twine &Name = "")
@@ -1115,7 +1181,7 @@ public:
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
- VPValue(this), Opcode(Opcode), Name(Name.str()) {}
+ Opcode(Opcode), Name(Name.str()) {}
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
@@ -1205,13 +1271,13 @@ public:
/// VPWidenRecipe is a recipe for producing a copy of vector type its
/// ingredient. This recipe covers most of the traditional vectorization cases
/// where each ingredient transforms into a vectorized version of itself.
-class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue {
+class VPWidenRecipe : public VPRecipeWithIRFlags {
unsigned Opcode;
public:
template <typename IterT>
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
- : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPValue(this, &I),
+ : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
Opcode(I.getOpcode()) {}
~VPWidenRecipe() override = default;
@@ -1231,7 +1297,7 @@ public:
};
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
-class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue {
+class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Cast instruction opcode.
Instruction::CastOps Opcode;
@@ -1241,8 +1307,8 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue {
public:
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
CastInst &UI)
- : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPValue(this, &UI),
- Opcode(Opcode), ResultTy(ResultTy) {
+ : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
+ ResultTy(ResultTy) {
assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
assert(UI.getType() == ResultTy &&
@@ -1250,8 +1316,8 @@ public:
}
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
- : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), VPValue(this, nullptr),
- Opcode(Opcode), ResultTy(ResultTy) {}
+ : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
+ ResultTy(ResultTy) {}
~VPWidenCastRecipe() override = default;
@@ -1273,7 +1339,7 @@ public:
};
/// A recipe for widening Call instructions.
-class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
+class VPWidenCallRecipe : public VPSingleDefRecipe {
/// ID of the vector intrinsic to call when widening the call. If set the
/// Intrinsic::not_intrinsic, a library call will be used instead.
Intrinsic::ID VectorIntrinsicID;
@@ -1286,9 +1352,9 @@ class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
public:
template <typename IterT>
VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments,
- Intrinsic::ID VectorIntrinsicID,
+ Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
Function *Variant = nullptr)
- : VPRecipeBase(VPDef::VPWidenCallSC, CallArguments), VPValue(this, &I),
+ : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, &I, DL),
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {}
~VPWidenCallRecipe() override = default;
@@ -1306,11 +1372,11 @@ public:
};
/// A recipe for widening select instructions.
-struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
+struct VPWidenSelectRecipe : public VPSingleDefRecipe {
template <typename IterT>
VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
- : VPRecipeBase(VPDef::VPWidenSelectSC, Operands, I.getDebugLoc()),
- VPValue(this, &I) {}
+ : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
+ I.getDebugLoc()) {}
~VPWidenSelectRecipe() override = default;
@@ -1335,7 +1401,7 @@ struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
};
/// A recipe for handling GEP instructions.
-class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue {
+class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
bool isPointerLoopInvariant() const {
return getOperand(0)->isDefinedOutsideVectorRegions();
}
@@ -1353,8 +1419,7 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue {
public:
template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
- : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP),
- VPValue(this, GEP) {}
+ : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
~VPWidenGEPRecipe() override = default;
@@ -1373,7 +1438,7 @@ public:
/// A recipe to compute the pointers for widened memory accesses of IndexTy for
/// all parts. If IsReverse is true, compute pointers for accessing the input in
/// reverse order per part.
-class VPVectorPointerRecipe : public VPRecipeWithIRFlags, public VPValue {
+class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
Type *IndexedTy;
bool IsReverse;
@@ -1382,7 +1447,7 @@ public:
bool IsInBounds, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlagsTy(IsInBounds), DL),
- VPValue(this), IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+ IndexedTy(IndexedTy), IsReverse(IsReverse) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
@@ -1424,11 +1489,11 @@ public:
/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
/// pointer induction. Produces either a vector PHI per-part or scalar values
/// per-lane based on the canonical induction.
-class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue {
+class VPHeaderPHIRecipe : public VPSingleDefRecipe {
protected:
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
VPValue *Start = nullptr, DebugLoc DL = {})
- : VPRecipeBase(VPDefID, {}, DL), VPValue(this, UnderlyingInstr) {
+ : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
if (Start)
addOperand(Start);
}
@@ -1709,14 +1774,13 @@ public:
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
/// instructions.
-class VPBlendRecipe : public VPRecipeBase, public VPValue {
+class VPBlendRecipe : public VPSingleDefRecipe {
public:
/// The blend operation is a User of the incoming values and of their
/// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
/// might be incoming with a full mask for which there is no VPValue.
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
- : VPRecipeBase(VPDef::VPBlendSC, Operands, Phi->getDebugLoc()),
- VPValue(this, Phi) {
+ : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
assert(Operands.size() > 0 &&
((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
"Expected either a single incoming value or a positive even number "
@@ -1843,14 +1907,15 @@ public:
/// A recipe to represent inloop reduction operations, performing a reduction on
/// a vector operand into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, [Condition]}.
-class VPReductionRecipe : public VPRecipeBase, public VPValue {
+class VPReductionRecipe : public VPSingleDefRecipe {
/// The recurrence decriptor for the reduction in question.
const RecurrenceDescriptor &RdxDesc;
public:
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp)
- : VPRecipeBase(VPDef::VPReductionSC, {ChainOp, VecOp}), VPValue(this, I),
+ : VPSingleDefRecipe(VPDef::VPReductionSC,
+ ArrayRef<VPValue *>({ChainOp, VecOp}), I),
RdxDesc(R) {
if (CondOp)
addOperand(CondOp);
@@ -1883,7 +1948,7 @@ public:
/// copies of the original scalar type, one per lane, instead of producing a
/// single copy of widened type for all lanes. If the instruction is known to be
/// uniform only one copy, per lane zero, will be generated.
-class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue {
+class VPReplicateRecipe : public VPRecipeWithIRFlags {
/// Indicator if only a single replica per lane is needed.
bool IsUniform;
@@ -1895,7 +1960,7 @@ public:
VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
bool IsUniform, VPValue *Mask = nullptr)
: VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
- VPValue(this, I), IsUniform(IsUniform), IsPredicated(Mask) {
+ IsUniform(IsUniform), IsPredicated(Mask) {
if (Mask)
addOperand(Mask);
}
@@ -1993,12 +2058,12 @@ public:
/// order to merge values that are set under such a branch and feed their uses.
/// The phi nodes can be scalar or vector depending on the users of the value.
/// This recipe works in concert with VPBranchOnMaskRecipe.
-class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue {
+class VPPredInstPHIRecipe : public VPSingleDefRecipe {
public:
/// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
/// nodes after merging back from a Branch-on-Mask.
VPPredInstPHIRecipe(VPValue *PredV)
- : VPRecipeBase(VPDef::VPPredInstPHISC, PredV), VPValue(this) {}
+ : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
~VPPredInstPHIRecipe() override = default;
VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
@@ -2119,14 +2184,13 @@ public:
};
/// Recipe to expand a SCEV expression.
-class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue {
+class VPExpandSCEVRecipe : public VPSingleDefRecipe {
const SCEV *Expr;
ScalarEvolution &SE;
public:
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
- : VPRecipeBase(VPDef::VPExpandSCEVSC, {}), VPValue(this), Expr(Expr),
- SE(SE) {}
+ : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
~VPExpandSCEVRecipe() override = default;
@@ -2225,11 +2289,10 @@ public:
};
/// A Recipe for widening the canonical induction variable of the vector loop.
-class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
+class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
public:
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
- : VPRecipeBase(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}),
- VPValue(this) {}
+ : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
~VPWidenCanonicalIVRecipe() override = default;
@@ -2256,7 +2319,7 @@ public:
/// A recipe for converting the canonical IV value to the corresponding value of
/// an IV with different start and step values, using Start + CanonicalIV *
/// Step.
-class VPDerivedIVRecipe : public VPRecipeBase, public VPValue {
+class VPDerivedIVRecipe : public VPSingleDefRecipe {
/// If not nullptr, the result of the induction will get truncated to
/// TruncResultTy.
Type *TruncResultTy;
@@ -2271,8 +2334,8 @@ public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
Type *TruncResultTy)
- : VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
- VPValue(this), TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()),
+ : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
+ TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()),
FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) {
}
@@ -2309,7 +2372,7 @@ public:
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their scalar values.
-class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, public VPValue {
+class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
Instruction::BinaryOps InductionOpcode;
public:
@@ -2317,7 +2380,7 @@ public:
Instruction::BinaryOps Opcode, FastMathFlags FMFs)
: VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
ArrayRef<VPValue *>({IV, Step}), FMFs),
- VPValue(this), InductionOpcode(Opcode) {}
+ InductionOpcode(Opcode) {}
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
VPValue *Step)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index f950d4740e41..94456bf858d9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -283,7 +283,7 @@ VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
BasicBlock *BB) {
VPIRBuilder.setInsertPoint(VPBB);
- for (Instruction &InstRef : *BB) {
+ for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
Instruction *Inst = &InstRef;
// There shouldn't be any VPValue for Inst at this point. Otherwise, we
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1f844bce2310..bbeb5da2cfec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -258,7 +258,7 @@ VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
const Twine &Name)
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
Pred, DL),
- VPValue(this), Opcode(Opcode), Name(Name.str()) {
+ Opcode(Opcode), Name(Name.str()) {
assert(Opcode == Instruction::ICmp &&
"only ICmp predicates supported at the moment");
}
@@ -267,7 +267,7 @@ VPInstruction::VPInstruction(unsigned Opcode,
std::initializer_list<VPValue *> Operands,
FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
- VPValue(this), Opcode(Opcode), Name(Name.str()) {
+ Opcode(Opcode), Name(Name.str()) {
// Make sure the VPInstruction is a floating-point operation.
assert(isFPMathOp() && "this op can't take fast-math flags");
}
@@ -580,7 +580,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
auto &CI = *cast<CallInst>(getUnderlyingInstr());
assert(!isa<DbgInfoIntrinsic>(CI) &&
"DbgInfoIntrinsic should have been dropped during VPlan construction");
- State.setDebugLocFrom(CI.getDebugLoc());
+ State.setDebugLocFrom(getDebugLoc());
bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
FunctionType *VFTy = nullptr;
@@ -1712,16 +1712,20 @@ void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPReductionPHIRecipe::execute(VPTransformState &State) {
- PHINode *PN = cast<PHINode>(getUnderlyingValue());
auto &Builder = State.Builder;
+ // Reductions do not have to start at zero. They can start with
+ // any loop invariant values.
+ VPValue *StartVPV = getStartValue();
+ Value *StartV = StartVPV->getLiveInIRValue();
+
// In order to support recurrences we need to be able to vectorize Phi nodes.
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
// this value when we vectorize all of the instructions that use the PHI.
bool ScalarPHI = State.VF.isScalar() || IsInLoop;
- Type *VecTy =
- ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
+ Type *VecTy = ScalarPHI ? StartV->getType()
+ : VectorType::get(StartV->getType(), State.VF);
BasicBlock *HeaderBB = State.CFG.PrevBB;
assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
@@ -1735,11 +1739,6 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
- // Reductions do not have to start at zero. They can start with
- // any loop invariant values.
- VPValue *StartVPV = getStartValue();
- Value *StartV = StartVPV->getLiveInIRValue();
-
Value *Iden = nullptr;
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 5c430620a2dc..8e6b48cdb2c8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -74,9 +74,9 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
- NewRecipe =
- new VPWidenCallRecipe(*CI, drop_end(Ingredient.operands()),
- getVectorIntrinsicIDForCall(CI, &TLI));
+ NewRecipe = new VPWidenCallRecipe(
+ *CI, drop_end(Ingredient.operands()),
+ getVectorIntrinsicIDForCall(CI, &TLI), CI->getDebugLoc());
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
} else if (auto *CI = dyn_cast<CastInst>(Inst)) {
@@ -103,7 +103,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
bool Changed = false;
// First, collect the operands of all recipes in replicate blocks as seeds for
// sinking.
- SetVector<std::pair<VPBasicBlock *, VPRecipeBase *>> WorkList;
+ SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
@@ -113,7 +113,8 @@ static bool sinkScalarOperands(VPlan &Plan) {
continue;
for (auto &Recipe : *VPBB) {
for (VPValue *Op : Recipe.operands())
- if (auto *Def = Op->getDefiningRecipe())
+ if (auto *Def =
+ dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
WorkList.insert(std::make_pair(VPBB, Def));
}
}
@@ -122,7 +123,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
// Try to sink each replicate or scalar IV steps recipe in the worklist.
for (unsigned I = 0; I != WorkList.size(); ++I) {
VPBasicBlock *SinkTo;
- VPRecipeBase *SinkCandidate;
+ VPSingleDefRecipe *SinkCandidate;
std::tie(SinkTo, SinkCandidate) = WorkList[I];
if (SinkCandidate->getParent() == SinkTo ||
SinkCandidate->mayHaveSideEffects() ||
@@ -146,12 +147,11 @@ static bool sinkScalarOperands(VPlan &Plan) {
return false;
if (UI->getParent() == SinkTo)
return true;
- NeedsDuplicating =
- UI->onlyFirstLaneUsed(SinkCandidate->getVPSingleValue());
+ NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
// We only know how to duplicate VPRecipeRecipes for now.
return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
};
- if (!all_of(SinkCandidate->getVPSingleValue()->users(), CanSinkWithUser))
+ if (!all_of(SinkCandidate->users(), CanSinkWithUser))
continue;
if (NeedsDuplicating) {
@@ -163,14 +163,14 @@ static bool sinkScalarOperands(VPlan &Plan) {
// TODO: add ".cloned" suffix to name of Clone's VPValue.
Clone->insertBefore(SinkCandidate);
- SinkCandidate->getVPSingleValue()->replaceUsesWithIf(
- Clone, [SinkTo](VPUser &U, unsigned) {
- return cast<VPRecipeBase>(&U)->getParent() != SinkTo;
- });
+ SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) {
+ return cast<VPRecipeBase>(&U)->getParent() != SinkTo;
+ });
}
SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
for (VPValue *Op : SinkCandidate->operands())
- if (auto *Def = Op->getDefiningRecipe())
+ if (auto *Def =
+ dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
WorkList.insert(std::make_pair(SinkTo, Def));
Changed = true;
}
@@ -412,16 +412,15 @@ void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) {
auto &Casts = IV->getInductionDescriptor().getCastInsts();
VPValue *FindMyCast = IV;
for (Instruction *IRCast : reverse(Casts)) {
- VPRecipeBase *FoundUserCast = nullptr;
+ VPSingleDefRecipe *FoundUserCast = nullptr;
for (auto *U : FindMyCast->users()) {
- auto *UserCast = cast<VPRecipeBase>(U);
- if (UserCast->getNumDefinedValues() == 1 &&
- UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) {
+ auto *UserCast = dyn_cast<VPSingleDefRecipe>(U);
+ if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
FoundUserCast = UserCast;
break;
}
}
- FindMyCast = FoundUserCast->getVPSingleValue();
+ FindMyCast = FoundUserCast;
}
FindMyCast->replaceAllUsesWith(IV);
}
@@ -895,7 +894,10 @@ void VPlanTransforms::truncateToMinimalBitwidths(
vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
- VPWidenSelectRecipe>(&R))
+ VPWidenSelectRecipe, VPWidenMemoryInstructionRecipe>(&R))
+ continue;
+ if (isa<VPWidenMemoryInstructionRecipe>(&R) &&
+ cast<VPWidenMemoryInstructionRecipe>(&R)->isStore())
continue;
VPValue *ResultVPV = R.getVPSingleValue();
@@ -948,6 +950,23 @@ void VPlanTransforms::truncateToMinimalBitwidths(
auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits);
+ // Any wrapping introduced by shrinking this operation shouldn't be
+ // considered undefined behavior. So, we can't unconditionally copy
+ // arithmetic wrapping flags to VPW.
+ if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))
+ VPW->dropPoisonGeneratingFlags();
+
+ // Extend result to original width.
+ auto *Ext = new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy);
+ Ext->insertAfter(&R);
+ ResultVPV->replaceAllUsesWith(Ext);
+ Ext->setOperand(0, ResultVPV);
+
+ if (isa<VPWidenMemoryInstructionRecipe>(&R)) {
+ assert(!cast<VPWidenMemoryInstructionRecipe>(&R)->isStore() && "stores cannot be narrowed");
+ continue;
+ }
+
// Shrink operands by introducing truncates as needed.
unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0;
for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
@@ -979,17 +998,6 @@ void VPlanTransforms::truncateToMinimalBitwidths(
}
}
- // Any wrapping introduced by shrinking this operation shouldn't be
- // considered undefined behavior. So, we can't unconditionally copy
- // arithmetic wrapping flags to VPW.
- if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))
- VPW->dropPoisonGeneratingFlags();
-
- // Extend result to original width.
- auto *Ext = new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy);
- Ext->insertAfter(&R);
- ResultVPV->replaceAllUsesWith(Ext);
- Ext->setOperand(0, ResultVPV);
}
}
@@ -1130,7 +1138,7 @@ void VPlanTransforms::addActiveLaneMask(
"Must have widened canonical IV when tail folding!");
auto *WideCanonicalIV =
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
- VPRecipeBase *LaneMask;
+ VPSingleDefRecipe *LaneMask;
if (UseActiveLaneMaskForControlFlow) {
LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(
Plan, DataAndControlFlowWithoutRuntimeCheck);
@@ -1155,7 +1163,7 @@ void VPlanTransforms::addActiveLaneMask(
assert(CompareToReplace->getOperand(0) == WideCanonicalIV &&
"WidenCanonicalIV must be the first operand of the compare");
- CompareToReplace->replaceAllUsesWith(LaneMask->getVPSingleValue());
+ CompareToReplace->replaceAllUsesWith(LaneMask);
CompareToReplace->eraseFromParent();
}
}