aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp161
1 files changed, 65 insertions, 96 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d11f4146b590..3290439ecd07 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -632,13 +632,6 @@ protected:
Instruction *EntryVal, VPValue *Def,
VPTransformState &State);
- /// Returns true if an instruction \p I should be scalarized instead of
- /// vectorized for the chosen vectorization factor.
- bool shouldScalarizeInstruction(Instruction *I) const;
-
- /// Returns true if we should generate a scalar version of \p IV.
- bool needsScalarInduction(Instruction *IV) const;
-
/// Returns (and creates if needed) the original loop trip count.
Value *getOrCreateTripCount(Loop *NewLoop);
@@ -2479,21 +2472,6 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
VecInd->addIncoming(LastInduction, LoopVectorLatch);
}
-bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const {
- return Cost->isScalarAfterVectorization(I, VF) ||
- Cost->isProfitableToScalarize(I, VF);
-}
-
-bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
- if (shouldScalarizeInstruction(IV))
- return true;
- auto isScalarInst = [&](User *U) -> bool {
- auto *I = cast<Instruction>(U);
- return (OrigLoop->contains(I) && shouldScalarizeInstruction(I));
- };
- return llvm::any_of(IV->users(), isScalarInst);
-}
-
void InnerLoopVectorizer::widenIntOrFpInduction(
PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State,
Value *CanonicalIV) {
@@ -2549,27 +2527,6 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
return ScalarIV;
};
- // Create the vector values from the scalar IV, in the absence of creating a
- // vector IV.
- auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) {
- Value *Broadcasted = getBroadcastInstrs(ScalarIV);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *StartIdx;
- if (Step->getType()->isFloatingPointTy())
- StartIdx =
- getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part);
- else
- StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part);
-
- Value *EntryPart =
- getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode(),
- State.VF, State.Builder);
- State.set(Def, EntryPart, Part);
- if (Trunc)
- addMetadata(EntryPart, Trunc);
- }
- };
-
// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
@@ -2605,36 +2562,18 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
return;
}
- // Determine if we want a scalar version of the induction variable. This is
- // true if the induction variable itself is not widened, or if it has at
- // least one user in the loop that is not widened.
- auto NeedsScalarIV = needsScalarInduction(EntryVal);
- if (!NeedsScalarIV) {
+ // Create a new independent vector induction variable, if one is needed.
+ if (Def->needsVectorIV())
createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
- return;
- }
- // Try to create a new independent vector induction variable. If we can't
- // create the phi node, we will splat the scalar induction variable in each
- // loop iteration.
- if (!shouldScalarizeInstruction(EntryVal)) {
- createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
- Value *ScalarIV = CreateScalarIV(Step);
+ if (Def->needsScalarIV()) {
// Create scalar steps that can be used by instructions we will later
// scalarize. Note that the addition of the scalar steps will not increase
// the number of instructions in the loop in the common case prior to
// InstCombine. We will be trading one vector extract for each scalar step.
+ Value *ScalarIV = CreateScalarIV(Step);
buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
- return;
}
-
- // All IV users are scalar instructions, so only emit a scalar IV, not a
- // vectorised IV. Except when we tail-fold, then the splat IV feeds the
- // predicate used by the masked loads/stores.
- Value *ScalarIV = CreateScalarIV(Step);
- if (!Cost->isScalarEpilogueAllowed())
- CreateSplatIV(ScalarIV, Step);
- buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
}
void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
@@ -2663,17 +2602,15 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
}
// Determine the number of scalars we need to generate for each unroll
- // iteration. If EntryVal is uniform, we only need to generate the first
- // lane. Otherwise, we generate all VF values.
- bool IsUniform =
- Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), State.VF);
- unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue();
+ // iteration.
+ bool FirstLaneOnly = vputils::onlyFirstLaneUsed(Def);
+ unsigned Lanes = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
// Compute the scalar steps and save the results in State.
Type *IntStepTy = IntegerType::get(ScalarIVTy->getContext(),
ScalarIVTy->getScalarSizeInBits());
Type *VecIVTy = nullptr;
Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
- if (!IsUniform && State.VF.isScalable()) {
+ if (!FirstLaneOnly && State.VF.isScalable()) {
VecIVTy = VectorType::get(ScalarIVTy, State.VF);
UnitStepVec =
Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
@@ -2684,7 +2621,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
- if (!IsUniform && State.VF.isScalable()) {
+ if (!FirstLaneOnly && State.VF.isScalable()) {
auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
if (ScalarIVTy->isFloatingPointTy())
@@ -4565,7 +4502,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
- bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF);
+ bool IsUniform = vputils::onlyFirstLaneUsed(PhiR);
assert((IsUniform || !State.VF.isScalable()) &&
"Cannot scalarize a scalable VF");
unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
@@ -5889,7 +5826,9 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
// consider interleaving beneficial (eg. MVE).
if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1)
return false;
- if (VF.getFixedValue() >= EpilogueVectorizationMinVF)
+ // FIXME: We should consider changing the threshold for scalable
+ // vectors to take VScaleForTuning into account.
+ if (VF.getKnownMinValue() >= EpilogueVectorizationMinVF)
return true;
return false;
}
@@ -5940,29 +5879,21 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
return Result;
}
- auto FixedMainLoopVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue());
- if (MainLoopVF.isScalable())
- LLVM_DEBUG(
- dbgs() << "LEV: Epilogue vectorization using scalable vectors not "
- "yet supported. Converting to fixed-width (VF="
- << FixedMainLoopVF << ") instead\n");
-
- if (!isEpilogueVectorizationProfitable(FixedMainLoopVF)) {
+ if (!isEpilogueVectorizationProfitable(MainLoopVF)) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "
"this loop\n");
return Result;
}
for (auto &NextVF : ProfitableVFs)
- if (ElementCount::isKnownLT(NextVF.Width, FixedMainLoopVF) &&
- (Result.Width.getFixedValue() == 1 ||
- isMoreProfitable(NextVF, Result)) &&
+ if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&
+ (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) &&
LVP.hasPlanWithVF(NextVF.Width))
Result = NextVF;
if (Result != VectorizationFactor::Disabled())
LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = "
- << Result.Width.getFixedValue() << "\n";);
+ << Result.Width << "\n";);
return Result;
}
@@ -8546,16 +8477,54 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
Mask, Consecutive, Reverse);
}
-VPWidenIntOrFpInductionRecipe *
-VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi,
- ArrayRef<VPValue *> Operands) const {
+static VPWidenIntOrFpInductionRecipe *
+createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc,
+ VPValue *Start, const InductionDescriptor &IndDesc,
+ LoopVectorizationCostModel &CM, Loop &OrigLoop,
+ VFRange &Range) {
+ // Returns true if an instruction \p I should be scalarized instead of
+ // vectorized for the chosen vectorization factor.
+ auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) {
+ return CM.isScalarAfterVectorization(I, VF) ||
+ CM.isProfitableToScalarize(I, VF);
+ };
+
+ bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ // Returns true if we should generate a scalar version of \p IV.
+ if (ShouldScalarizeInstruction(PhiOrTrunc, VF))
+ return true;
+ auto isScalarInst = [&](User *U) -> bool {
+ auto *I = cast<Instruction>(U);
+ return OrigLoop.contains(I) && ShouldScalarizeInstruction(I, VF);
+ };
+ return any_of(PhiOrTrunc->users(), isScalarInst);
+ },
+ Range);
+ bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ return ShouldScalarizeInstruction(PhiOrTrunc, VF);
+ },
+ Range);
+ assert(IndDesc.getStartValue() ==
+ Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
+ if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, TruncI,
+ NeedsScalarIV, !NeedsScalarIVOnly);
+ }
+ assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV,
+ !NeedsScalarIVOnly);
+}
+
+VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
+ PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) const {
+
// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
- if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) {
- assert(II->getStartValue() ==
- Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
- return new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], *II);
- }
+ if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
+ return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *OrigLoop,
+ Range);
return nullptr;
}
@@ -8583,7 +8552,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
auto *Phi = cast<PHINode>(I->getOperand(0));
const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
VPValue *Start = Plan.getOrAddVPValue(II.getStartValue());
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I);
+ return createWidenInductionRecipe(Phi, I, Start, II, CM, *OrigLoop, Range);
}
return nullptr;
}
@@ -8865,7 +8834,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
if (auto Phi = dyn_cast<PHINode>(Instr)) {
if (Phi->getParent() != OrigLoop->getHeader())
return tryToBlend(Phi, Operands, Plan);
- if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands)))
+ if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
return toVPRecipeResult(Recipe);
VPHeaderPHIRecipe *PhiRecipe = nullptr;