aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-08-21 21:27:36 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-08-21 21:27:36 +0000
commitd545c2ce5ad1891282e8818b47ffe557c76a86b4 (patch)
tree98c4a1af94aa0ecc49fb4192ac42564bbe3dc3fd /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parent9cb5bdb8b26e2207293f0fb56701c4a0ff64a47d (diff)
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp46
1 files changed, 46 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f24ae6b100d5..671bc6b5212b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5433,6 +5433,21 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// lane 0 demanded or b) are uses which demand only lane 0 of their operand.
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::sideeffect:
+ case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::assume:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ if (TheLoop->hasLoopInvariantOperands(&I))
+ addToWorklistIfAllowed(&I);
+ break;
+ default:
+ break;
+ }
+ }
+
// If there's no pointer operand, there's nothing to do.
auto *Ptr = getLoadStorePointerOperand(&I);
if (!Ptr)
@@ -8916,6 +8931,37 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range);
+ // Even if the instruction is not marked as uniform, there are certain
+ // intrinsic calls that can be effectively treated as such, so we check for
+ // them here. Conservatively, we only do this for scalable vectors, since
+ // for fixed-width VFs we can always fall back on full scalarization.
+ if (!IsUniform && Range.Start.isScalable() && isa<IntrinsicInst>(I)) {
+ switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ case Intrinsic::assume:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // For scalable vectors if one of the operands is variant then we still
+ // want to mark as uniform, which will generate one instruction for just
+ // the first lane of the vector. We can't scalarize the call in the same
+ // way as for fixed-width vectors because we don't know how many lanes
+ // there are.
+ //
+ // The reasons for doing it this way for scalable vectors are:
+ // 1. For the assume intrinsic generating the instruction for the first
+ // lane is still be better than not generating any at all. For
+ // example, the input may be a splat across all lanes.
+ // 2. For the lifetime start/end intrinsics the pointer operand only
+ // does anything useful when the input comes from a stack object,
+ // which suggests it should always be uniform. For non-stack objects
+ // the effect is to poison the object, which still allows us to
+ // remove the call.
+ IsUniform = true;
+ break;
+ default:
+ break;
+ }
+ }
+
auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()),
IsUniform, IsPredicated);
setRecipe(I, Recipe);