From d545c2ce5ad1891282e8818b47ffe557c76a86b4 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 21 Aug 2021 23:27:36 +0200 Subject: Vendor import of llvm-project branch release/13.x llvmorg-13.0.0-rc1-97-g23ba3732246a. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 46 +++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f24ae6b100d5..671bc6b5212b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5433,6 +5433,21 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { // lane 0 demanded or b) are uses which demand only lane 0 of their operand. for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { + if (IntrinsicInst *II = dyn_cast(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::sideeffect: + case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::assume: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + if (TheLoop->hasLoopInvariantOperands(&I)) + addToWorklistIfAllowed(&I); + break; + default: + break; + } + } + // If there's no pointer operand, there's nothing to do. auto *Ptr = getLoadStorePointerOperand(&I); if (!Ptr) @@ -8916,6 +8931,37 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range); + // Even if the instruction is not marked as uniform, there are certain + // intrinsic calls that can be effectively treated as such, so we check for + // them here. Conservatively, we only do this for scalable vectors, since + // for fixed-width VFs we can always fall back on full scalarization. + if (!IsUniform && Range.Start.isScalable() && isa(I)) { + switch (cast(I)->getIntrinsicID()) { + case Intrinsic::assume: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // For scalable vectors if one of the operands is variant then we still + // want to mark as uniform, which will generate one instruction for just + // the first lane of the vector. We can't scalarize the call in the same + // way as for fixed-width vectors because we don't know how many lanes + // there are. + // + // The reasons for doing it this way for scalable vectors are: + // 1. For the assume intrinsic generating the instruction for the first + // lane is still be better than not generating any at all. For + // example, the input may be a splat across all lanes. + // 2. For the lifetime start/end intrinsics the pointer operand only + // does anything useful when the input comes from a stack object, + // which suggests it should always be uniform. For non-stack objects + // the effect is to poison the object, which still allows us to + // remove the call. + IsUniform = true; + break; + default: + break; + } + } + auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); setRecipe(I, Recipe); -- cgit v1.2.3