diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 019a09665a67..e136cd9aedac 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2637,7 +2637,7 @@ private: AliasCacheKey key = std::make_pair(Inst1, Inst2); Optional<bool> &result = AliasCache[key]; if (result) { - return result.getValue(); + return result.value(); } bool aliased = true; if (Loc1.Ptr && isSimple(Inst1)) @@ -4592,7 +4592,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, }; InstructionsState S = getSameOpcode(VL); - if (Depth == RecursionMaxDepth) { + + // Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of + // a load), in which case peek through to include it in the tree, without + // ballooning over-budget. + if (Depth >= RecursionMaxDepth && + !(S.MainOp && isa<Instruction>(S.MainOp) && S.MainOp == S.AltOp && + VL.size() >= 4 && + (match(S.MainOp, m_Load(m_Value())) || all_of(VL, [&S](const Value *I) { + return match(I, + m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) && + cast<Instruction>(I)->getOpcode() == + cast<Instruction>(S.MainOp)->getOpcode(); + })))) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); if (TryToFindDuplicates(S)) newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, @@ -11217,7 +11229,7 @@ public: return OptimizationRemarkMissed( SV_NAME, "HorSLPNotBeneficial", ReducedValsToOps.find(VL[0])->second.front()) - << "Vectorizing horizontal reduction is possible" + << "Vectorizing horizontal reduction is possible " << "but not beneficial with cost " << ore::NV("Cost", Cost) << " and threshold " << ore::NV("Threshold", -SLPCostThreshold); |