diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 35 |
1 files changed, 32 insertions, 3 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3290439ecd07..21c16f07e237 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1701,6 +1701,11 @@ public: private: unsigned NumPredStores = 0; + /// Convenience function that returns the value of vscale_range iff + /// vscale_range.min == vscale_range.max or otherwise returns the value + /// returned by the corresponding TLI method. + Optional<unsigned> getVScaleForTuning() const; + /// \return An upper bound for the vectorization factors for both /// fixed and scalable vectorization, where the minimum-known number of /// elements is a power-of-2 larger than zero. If scalable vectorization is @@ -5600,6 +5605,18 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget( return MaxVF; } +Optional<unsigned> LoopVectorizationCostModel::getVScaleForTuning() const { + if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) { + auto Attr = TheFunction->getFnAttribute(Attribute::VScaleRange); + auto Min = Attr.getVScaleRangeMin(); + auto Max = Attr.getVScaleRangeMax(); + if (Max && Min == Max) + return Max; + } + + return TTI.getVScaleForTuning(); +} + bool LoopVectorizationCostModel::isMoreProfitable( const VectorizationFactor &A, const VectorizationFactor &B) const { InstructionCost CostA = A.Cost; @@ -5624,7 +5641,7 @@ bool LoopVectorizationCostModel::isMoreProfitable( // Improve estimate for the vector width if it is scalable. unsigned EstimatedWidthA = A.Width.getKnownMinValue(); unsigned EstimatedWidthB = B.Width.getKnownMinValue(); - if (Optional<unsigned> VScale = TTI.getVScaleForTuning()) { + if (Optional<unsigned> VScale = getVScaleForTuning()) { if (A.Width.isScalable()) EstimatedWidthA *= VScale.getValue(); if (B.Width.isScalable()) @@ -5673,7 +5690,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( #ifndef NDEBUG unsigned AssumedMinimumVscale = 1; - if (Optional<unsigned> VScale = TTI.getVScaleForTuning()) + if (Optional<unsigned> VScale = getVScaleForTuning()) AssumedMinimumVscale = VScale.getValue(); unsigned Width = Candidate.Width.isScalable() @@ -5885,8 +5902,20 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor( return Result; } + // If MainLoopVF = vscale x 2, and vscale is expected to be 4, then we know + // the main loop handles 8 lanes per iteration. We could still benefit from + // vectorizing the epilogue loop with VF=4. + ElementCount EstimatedRuntimeVF = MainLoopVF; + if (MainLoopVF.isScalable()) { + EstimatedRuntimeVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue()); + if (Optional<unsigned> VScale = getVScaleForTuning()) + EstimatedRuntimeVF *= VScale.getValue(); + } + for (auto &NextVF : ProfitableVFs) - if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) && + if (((!NextVF.Width.isScalable() && MainLoopVF.isScalable() && + ElementCount::isKnownLT(NextVF.Width, EstimatedRuntimeVF)) || + ElementCount::isKnownLT(NextVF.Width, MainLoopVF)) && (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) && LVP.hasPlanWithVF(NextVF.Width)) Result = NextVF; |