diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 123 |
1 files changed, 52 insertions, 71 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9d799124074c..32913b3f5569 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3760,40 +3760,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { OrdersType CurrentOrder(NumScalars, NumScalars); SmallVector<int> Positions; SmallBitVector UsedPositions(NumScalars); - DenseMap<const TreeEntry *, unsigned> UsedEntries; - DenseMap<Value *, std::pair<const TreeEntry *, unsigned>> ValueToEntryPos; - for (Value *V : TE.Scalars) { - if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V)) - continue; - const auto *LocalSTE = getTreeEntry(V); - if (!LocalSTE) - continue; - unsigned Lane = - std::distance(LocalSTE->Scalars.begin(), find(LocalSTE->Scalars, V)); - if (Lane >= NumScalars) - continue; - ++UsedEntries.try_emplace(LocalSTE, 0).first->getSecond(); - ValueToEntryPos.try_emplace(V, LocalSTE, Lane); - } - if (UsedEntries.empty()) - return std::nullopt; - const TreeEntry &BestSTE = - *std::max_element(UsedEntries.begin(), UsedEntries.end(), - [](const std::pair<const TreeEntry *, unsigned> &P1, - const std::pair<const TreeEntry *, unsigned> &P2) { - return P1.second < P2.second; - }) - ->first; - UsedEntries.erase(&BestSTE); - const TreeEntry *SecondBestSTE = nullptr; - if (!UsedEntries.empty()) - SecondBestSTE = - std::max_element(UsedEntries.begin(), UsedEntries.end(), - [](const std::pair<const TreeEntry *, unsigned> &P1, - const std::pair<const TreeEntry *, unsigned> &P2) { - return P1.second < P2.second; - }) - ->first; + const TreeEntry *STE = nullptr; // Try to find all gathered scalars that are gets vectorized in other // vectorize node. Here we can have only one single tree vector node to // correctly identify order of the gathered scalars. @@ -3801,46 +3768,53 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { Value *V = TE.Scalars[I]; if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V)) continue; - const auto [LocalSTE, Lane] = ValueToEntryPos.lookup(V); - if (!LocalSTE || (LocalSTE != &BestSTE && LocalSTE != SecondBestSTE)) - continue; - if (CurrentOrder[Lane] != NumScalars) { - if ((CurrentOrder[Lane] >= BestSTE.Scalars.size() || - BestSTE.Scalars[CurrentOrder[Lane]] == V) && - (Lane != I || LocalSTE == SecondBestSTE)) - continue; - UsedPositions.reset(CurrentOrder[Lane]); + if (const auto *LocalSTE = getTreeEntry(V)) { + if (!STE) + STE = LocalSTE; + else if (STE != LocalSTE) + // Take the order only from the single vector node. + return std::nullopt; + unsigned Lane = + std::distance(STE->Scalars.begin(), find(STE->Scalars, V)); + if (Lane >= NumScalars) + return std::nullopt; + if (CurrentOrder[Lane] != NumScalars) { + if (Lane != I) + continue; + UsedPositions.reset(CurrentOrder[Lane]); + } + // The partial identity (where only some elements of the gather node are + // in the identity order) is good. + CurrentOrder[Lane] = I; + UsedPositions.set(I); } - // The partial identity (where only some elements of the gather node are - // in the identity order) is good. - CurrentOrder[Lane] = I; - UsedPositions.set(I); } // Need to keep the order if we have a vector entry and at least 2 scalars or // the vectorized entry has just 2 scalars. - if (BestSTE.Scalars.size() != 2 && UsedPositions.count() <= 1) - return std::nullopt; - auto IsIdentityOrder = [&](ArrayRef<unsigned> CurrentOrder) { - for (unsigned I = 0; I < NumScalars; ++I) - if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars) - return false; - return true; - }; - if (IsIdentityOrder(CurrentOrder)) - return OrdersType(); - auto *It = CurrentOrder.begin(); - for (unsigned I = 0; I < NumScalars;) { - if (UsedPositions.test(I)) { - ++I; - continue; - } - if (*It == NumScalars) { - *It = I; - ++I; + if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) { + auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) { + for (unsigned I = 0; I < NumScalars; ++I) + if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars) + return false; + return true; + }; + if (IsIdentityOrder(CurrentOrder)) + return OrdersType(); + auto *It = CurrentOrder.begin(); + for (unsigned I = 0; I < NumScalars;) { + if (UsedPositions.test(I)) { + ++I; + continue; + } + if (*It == NumScalars) { + *It = I; + ++I; + } + ++It; } - ++It; + return std::move(CurrentOrder); } - return std::move(CurrentOrder); + return std::nullopt; } namespace { @@ -6469,7 +6443,7 @@ bool BoUpSLP::areAllUsersVectorized( Instruction *I, const SmallDenseSet<Value *> *VectorizedVals) const { return (I->hasOneUse() && (!VectorizedVals || VectorizedVals->contains(I))) || all_of(I->users(), [this](User *U) { - return ScalarToTreeEntry.count(U) > 0 || + return ScalarToTreeEntry.contains(U) || isVectorLikeInstWithConstOps(U) || (isa<ExtractElementInst>(U) && MustGather.contains(U)); }); @@ -11498,7 +11472,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *V = Builder.CreateBinOp( static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS); - propagateIRFlags(V, E->Scalars, VL0); + propagateIRFlags(V, E->Scalars, VL0, !MinBWs.contains(E)); if (auto *I = dyn_cast<Instruction>(V)) V = propagateMetadata(I, E->Scalars); @@ -15730,6 +15704,8 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI, assert(isValidElementType(V->getType()) && isValidElementType(V2->getType()) && "Expected valid element types only."); + if (V == V2) + return IsCompatibility; auto *CI1 = cast<CmpInst>(V); auto *CI2 = cast<CmpInst>(V2); if (CI1->getOperand(0)->getType()->getTypeID() < @@ -15754,6 +15730,8 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI, for (int I = 0, E = CI1->getNumOperands(); I < E; ++I) { auto *Op1 = CI1->getOperand(CI1Preds ? I : E - I - 1); auto *Op2 = CI2->getOperand(CI2Preds ? I : E - I - 1); + if (Op1 == Op2) + continue; if (Op1->getValueID() < Op2->getValueID()) return !IsCompatibility; if (Op1->getValueID() > Op2->getValueID()) @@ -15780,7 +15758,10 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI, InstructionsState S = getSameOpcode({I1, I2}, TLI); if (S.getOpcode() && (IsCompatibility || !S.isAltShuffle())) continue; - return !IsCompatibility && I1->getOpcode() < I2->getOpcode(); + if (IsCompatibility) + return false; + if (I1->getOpcode() != I2->getOpcode()) + return I1->getOpcode() < I2->getOpcode(); } } return IsCompatibility; |