aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp217
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp83
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp256
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp38
12 files changed, 198 insertions, 522 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 34c8a380448e..503ce019dc84 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -29,7 +28,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -398,6 +396,54 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
return true;
}
+/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
+/// pessimistic codegen that has to account for setting errno and can enable
+/// vectorization.
+static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI, AssumptionCache &AC,
+ DominatorTree &DT) {
+ // Match a call to sqrt mathlib function.
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ return false;
+
+ Module *M = Call->getModule();
+ LibFunc Func;
+ if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
+ return false;
+
+ if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
+ return false;
+
+ // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
+ // (because NNAN or the operand arg must not be less than -0.0) and (2) we
+ // would not end up lowering to a libcall anyway (which could change the value
+ // of errno), then:
+ // (1) errno won't be set.
+ // (2) it is safe to convert this to an intrinsic call.
+ Type *Ty = Call->getType();
+ Value *Arg = Call->getArgOperand(0);
+ if (TTI.haveFastSqrt(Ty) &&
+ (Call->hasNoNaNs() ||
+ cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, &I,
+ &DT))) {
+ IRBuilder<> Builder(&I);
+ IRBuilderBase::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(Call->getFastMathFlags());
+
+ Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
+ Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
+ I.replaceAllUsesWith(NewSqrt);
+
+ // Explicitly erase the old call because a call with side effects is not
+ // trivially dead.
+ I.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
// Check if this array of constants represents a cttz table.
// Iterate over the elements from \p Table by trying to find/match all
// the numbers from 0 to \p InputBits that should represent cttz results.
@@ -869,159 +915,13 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
return true;
}
-/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
-/// pessimistic codegen that has to account for setting errno and can enable
-/// vectorization.
-static bool foldSqrt(CallInst *Call, TargetTransformInfo &TTI,
- TargetLibraryInfo &TLI, AssumptionCache &AC,
- DominatorTree &DT) {
- Module *M = Call->getModule();
-
- // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
- // (because NNAN or the operand arg must not be less than -0.0) and (2) we
- // would not end up lowering to a libcall anyway (which could change the value
- // of errno), then:
- // (1) errno won't be set.
- // (2) it is safe to convert this to an intrinsic call.
- Type *Ty = Call->getType();
- Value *Arg = Call->getArgOperand(0);
- if (TTI.haveFastSqrt(Ty) &&
- (Call->hasNoNaNs() ||
- cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, Call,
- &DT))) {
- IRBuilder<> Builder(Call);
- IRBuilderBase::FastMathFlagGuard Guard(Builder);
- Builder.setFastMathFlags(Call->getFastMathFlags());
-
- Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
- Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
- Call->replaceAllUsesWith(NewSqrt);
-
- // Explicitly erase the old call because a call with side effects is not
- // trivially dead.
- Call->eraseFromParent();
- return true;
- }
-
- return false;
-}
-
-/// Try to expand strcmp(P, "x") calls.
-static bool expandStrcmp(CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) {
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
-
- // Trivial cases are optimized during inst combine
- if (Str1P == Str2P)
- return false;
-
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
- Value *NonConstantP = nullptr;
- StringRef ConstantStr;
-
- if (!HasStr1 && HasStr2 && Str2.size() == 1) {
- NonConstantP = Str1P;
- ConstantStr = Str2;
- } else if (!HasStr2 && HasStr1 && Str1.size() == 1) {
- NonConstantP = Str2P;
- ConstantStr = Str1;
- } else {
- return false;
- }
-
- // Check if strcmp result is only used in a comparison with zero
- if (!isOnlyUsedInZeroComparison(CI))
- return false;
-
- // For strcmp(P, "x") do the following transformation:
- //
- // (before)
- // dst = strcmp(P, "x")
- //
- // (after)
- // v0 = P[0] - 'x'
- // [if v0 == 0]
- // v1 = P[1]
- // dst = phi(v0, v1)
- //
-
- IRBuilder<> B(CI->getParent());
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
-
- Type *RetType = CI->getType();
-
- B.SetInsertPoint(CI);
- BasicBlock *InitialBB = B.GetInsertBlock();
- Value *Str1FirstCharacterValue =
- B.CreateZExt(B.CreateLoad(B.getInt8Ty(), NonConstantP), RetType);
- Value *Str2FirstCharacterValue =
- ConstantInt::get(RetType, static_cast<unsigned char>(ConstantStr[0]));
- Value *FirstCharacterSub =
- B.CreateNSWSub(Str1FirstCharacterValue, Str2FirstCharacterValue);
- Value *IsFirstCharacterSubZero =
- B.CreateICmpEQ(FirstCharacterSub, ConstantInt::get(RetType, 0));
- Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen(
- IsFirstCharacterSubZero, CI, /*Unreachable*/ false,
- /*BranchWeights*/ nullptr, &DTU);
-
- B.SetInsertPoint(IsFirstCharacterSubZeroBBTerminator);
- B.GetInsertBlock()->setName("strcmp_expand_sub_is_zero");
- BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock();
- Value *Str1SecondCharacterValue = B.CreateZExt(
- B.CreateLoad(B.getInt8Ty(), B.CreateConstInBoundsGEP1_64(
- B.getInt8Ty(), NonConstantP, 1)),
- RetType);
-
- B.SetInsertPoint(CI);
- B.GetInsertBlock()->setName("strcmp_expand_sub_join");
-
- PHINode *Result = B.CreatePHI(RetType, 2);
- Result->addIncoming(FirstCharacterSub, InitialBB);
- Result->addIncoming(Str1SecondCharacterValue, IsFirstCharacterSubZeroBB);
-
- CI->replaceAllUsesWith(Result);
- CI->eraseFromParent();
-
- MadeCFGChange = true;
-
- return true;
-}
-
-static bool foldLibraryCalls(Instruction &I, TargetTransformInfo &TTI,
- TargetLibraryInfo &TLI, DominatorTree &DT,
- AssumptionCache &AC, bool &MadeCFGChange) {
- CallInst *CI = dyn_cast<CallInst>(&I);
- if (!CI)
- return false;
-
- LibFunc Func;
- Module *M = I.getModule();
- if (!TLI.getLibFunc(*CI, Func) || !isLibFuncEmittable(M, &TLI, Func))
- return false;
-
- switch (Func) {
- case LibFunc_sqrt:
- case LibFunc_sqrtf:
- case LibFunc_sqrtl:
- return foldSqrt(CI, TTI, TLI, AC, DT);
- case LibFunc_strcmp:
- return expandStrcmp(CI, DT, MadeCFGChange);
- default:
- break;
- }
-
- return false;
-}
-
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, AliasAnalysis &AA,
- AssumptionCache &AC, bool &MadeCFGChange) {
+ AssumptionCache &AC) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
@@ -1046,7 +946,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
// NOTE: This function introduces erasing of the instruction `I`, so it
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
- MadeChange |= foldLibraryCalls(I, TTI, TLI, DT, AC, MadeCFGChange);
+ MadeChange |= foldSqrt(I, TTI, TLI, AC, DT);
}
}
@@ -1062,12 +962,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
/// handled in the callers of this function.
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, DominatorTree &DT,
- AliasAnalysis &AA, bool &ChangedCFG) {
+ AliasAnalysis &AA) {
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
- MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, ChangedCFG);
+ MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC);
return MadeChange;
}
@@ -1078,21 +978,12 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
-
- bool MadeCFGChange = false;
-
- if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
+ if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
}
-
// Mark all the analyses that instcombine updates as preserved.
PreservedAnalyses PA;
-
- if (MadeCFGChange)
- PA.preserve<DominatorTreeAnalysis>();
- else
- PA.preserveSet<CFGAnalyses>();
-
+ PA.preserveSet<CFGAnalyses>();
return PA;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index d78ab1c1ea28..d0606c15f3d5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -194,12 +194,49 @@ bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
for (auto *DA : It->second)
Visited.insert(DA->getParent());
+ SmallPtrSet<const BasicBlock *, 32> EscapingBBs;
+ for (auto *U : CB->users()) {
+ // The use from coroutine intrinsics are not a problem.
+ if (isa<CoroFreeInst, CoroSubFnInst, CoroSaveInst>(U))
+ continue;
+
+ // Think all other usages may be an escaping candidate conservatively.
+ //
+ // Note that the major user of switch ABI coroutine (the C++) will store
+ // resume.fn, destroy.fn and the index to the coroutine frame immediately.
+ // So the parent of the coro.begin in C++ will be always escaping.
+ // Then we can't get any performance benefits for C++ by improving the
+ // precision of the method.
+ //
+ // The reason why we still judge it is we want to make LLVM Coroutine in
+ // switch ABIs to be self contained as much as possible instead of a
+ // by-product of C++20 Coroutines.
+ EscapingBBs.insert(cast<Instruction>(U)->getParent());
+ }
+
+ bool PotentiallyEscaped = false;
+
do {
const auto *BB = Worklist.pop_back_val();
if (!Visited.insert(BB).second)
continue;
- if (TIs.count(BB))
- return true;
+
+ // A Path insensitive marker to test whether the coro.begin escapes.
+ // It is intentional to make it path insensitive while it may not be
+ // precise since we don't want the process to be too slow.
+ PotentiallyEscaped |= EscapingBBs.count(BB);
+
+ if (TIs.count(BB)) {
+ if (!BB->getTerminator()->isExceptionalTerminator() || PotentiallyEscaped)
+ return true;
+
+ // If the function ends with the exceptional terminator, the memory used
+ // by the coroutine frame can be released by stack unwinding
+ // automatically. So we can think the coro.begin doesn't escape if it
+ // exits the function by exceptional terminator.
+
+ continue;
+ }
// Conservatively say that there is potentially a path.
if (!--Limit)
@@ -236,36 +273,36 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
// memory location storing that value and not the virtual register.
SmallPtrSet<BasicBlock *, 8> Terminators;
- // First gather all of the non-exceptional terminators for the function.
+ // First gather all of the terminators for the function.
// Consider the final coro.suspend as the real terminator when the current
// function is a coroutine.
- for (BasicBlock &B : *F) {
- auto *TI = B.getTerminator();
- if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
- !isa<UnreachableInst>(TI))
- Terminators.insert(&B);
- }
+ for (BasicBlock &B : *F) {
+ auto *TI = B.getTerminator();
+
+ if (TI->getNumSuccessors() != 0 || isa<UnreachableInst>(TI))
+ continue;
+
+ Terminators.insert(&B);
+ }
// Filter out the coro.destroy that lie along exceptional paths.
SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
for (const auto &It : DestroyAddr) {
- // If there is any coro.destroy dominates all of the terminators for the
- // coro.begin, we could know the corresponding coro.begin wouldn't escape.
- for (Instruction *DA : It.second) {
- if (llvm::all_of(Terminators, [&](auto *TI) {
- return DT.dominates(DA, TI->getTerminator());
- })) {
- ReferencedCoroBegins.insert(It.first);
- break;
- }
- }
-
- // Whether there is any paths from coro.begin to Terminators which not pass
- // through any of the coro.destroys.
+ // If every terminators is dominated by coro.destroy, we could know the
+ // corresponding coro.begin wouldn't escape.
+ //
+ // Otherwise hasEscapePath would decide whether there is any paths from
+ // coro.begin to Terminators which not pass through any of the
+ // coro.destroys.
//
// hasEscapePath is relatively slow, so we avoid to run it as much as
// possible.
- if (!ReferencedCoroBegins.count(It.first) &&
+ if (llvm::all_of(Terminators,
+ [&](auto *TI) {
+ return llvm::any_of(It.second, [&](auto *DA) {
+ return DT.dominates(DA, TI->getTerminator());
+ });
+ }) ||
!hasEscapePath(It.first, Terminators))
ReferencedCoroBegins.insert(It.first);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 3d6c501e4596..ac5dbc7cfb2a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -78,11 +78,6 @@ static cl::opt<unsigned> MaxClones(
"The maximum number of clones allowed for a single function "
"specialization"));
-static cl::opt<unsigned> MaxIncomingPhiValues(
- "funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc(
- "The maximum number of incoming values a PHI node can have to be "
- "considered during the specialization bonus estimation"));
-
static cl::opt<unsigned> MinFunctionSize(
"funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc(
"Don't specialize functions that have less than this number of "
@@ -109,7 +104,6 @@ static cl::opt<bool> SpecializeLiteralConstant(
// the combination of size and latency savings in comparison to the non
// specialized version of the function.
static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
- DenseSet<BasicBlock *> &DeadBlocks,
ConstMap &KnownConstants, SCCPSolver &Solver,
BlockFrequencyInfo &BFI,
TargetTransformInfo &TTI) {
@@ -124,12 +118,6 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
if (!Weight)
continue;
- // These blocks are considered dead as far as the InstCostVisitor is
- // concerned. They haven't been proven dead yet by the Solver, but
- // may become if we propagate the constant specialization arguments.
- if (!DeadBlocks.insert(BB).second)
- continue;
-
for (Instruction &I : *BB) {
// Disregard SSA copies.
if (auto *II = dyn_cast<IntrinsicInst>(&I))
@@ -164,19 +152,9 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
return nullptr;
}
-Cost InstCostVisitor::getBonusFromPendingPHIs() {
- Cost Bonus = 0;
- while (!PendingPHIs.empty()) {
- Instruction *Phi = PendingPHIs.pop_back_val();
- Bonus += getUserBonus(Phi);
- }
- return Bonus;
-}
-
Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
// Cache the iterator before visiting.
- LastVisited = Use ? KnownConstants.insert({Use, C}).first
- : KnownConstants.end();
+ LastVisited = KnownConstants.insert({Use, C}).first;
if (auto *I = dyn_cast<SwitchInst>(User))
return estimateSwitchInst(*I);
@@ -203,15 +181,13 @@ Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
for (auto *U : User->users())
if (auto *UI = dyn_cast<Instruction>(U))
- if (UI != User && Solver.isBlockExecutable(UI->getParent()))
+ if (Solver.isBlockExecutable(UI->getParent()))
Bonus += getUserBonus(UI, User, C);
return Bonus;
}
Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
- assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
-
if (I.getCondition() != LastVisited->first)
return 0;
@@ -232,13 +208,10 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
WorkList.push_back(BB);
}
- return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
- TTI);
+ return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI);
}
Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
- assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
-
if (I.getCondition() != LastVisited->first)
return 0;
@@ -250,39 +223,10 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
Succ->getUniquePredecessor() == I.getParent())
WorkList.push_back(Succ);
- return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
- TTI);
-}
-
-Constant *InstCostVisitor::visitPHINode(PHINode &I) {
- if (I.getNumIncomingValues() > MaxIncomingPhiValues)
- return nullptr;
-
- bool Inserted = VisitedPHIs.insert(&I).second;
- Constant *Const = nullptr;
-
- for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {
- Value *V = I.getIncomingValue(Idx);
- if (auto *Inst = dyn_cast<Instruction>(V))
- if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
- continue;
- Constant *C = findConstantFor(V, KnownConstants);
- if (!C) {
- if (Inserted)
- PendingPHIs.push_back(&I);
- return nullptr;
- }
- if (!Const)
- Const = C;
- else if (C != Const)
- return nullptr;
- }
- return Const;
+ return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI);
}
Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) {
- assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
-
if (isGuaranteedNotToBeUndefOrPoison(LastVisited->second))
return LastVisited->second;
return nullptr;
@@ -309,8 +253,6 @@ Constant *InstCostVisitor::visitCallBase(CallBase &I) {
}
Constant *InstCostVisitor::visitLoadInst(LoadInst &I) {
- assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
-
if (isa<ConstantPointerNull>(LastVisited->second))
return nullptr;
return ConstantFoldLoadFromConstPtr(LastVisited->second, I.getType(), DL);
@@ -333,8 +275,6 @@ Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
}
Constant *InstCostVisitor::visitSelectInst(SelectInst &I) {
- assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
-
if (I.getCondition() != LastVisited->first)
return nullptr;
@@ -350,8 +290,6 @@ Constant *InstCostVisitor::visitCastInst(CastInst &I) {
}
Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
- assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
-
bool Swap = I.getOperand(1) == LastVisited->first;
Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
Constant *Other = findConstantFor(V, KnownConstants);
@@ -365,14 +303,10 @@ Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
}
Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) {
- assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
-
return ConstantFoldUnaryOpOperand(I.getOpcode(), LastVisited->second, DL);
}
Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) {
- assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
-
bool Swap = I.getOperand(1) == LastVisited->first;
Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
Constant *Other = findConstantFor(V, KnownConstants);
@@ -779,17 +713,13 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
AllSpecs[Index].CallSites.push_back(&CS);
} else {
// Calculate the specialisation gain.
- Cost Score = 0;
+ Cost Score = 0 - SpecCost;
InstCostVisitor Visitor = getInstCostVisitorFor(F);
for (ArgInfo &A : S.Args)
Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
- Score += Visitor.getBonusFromPendingPHIs();
-
- LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score = "
- << Score << "\n");
// Discard unprofitable specialisations.
- if (!ForceSpecialization && Score <= SpecCost)
+ if (!ForceSpecialization && Score <= 0)
continue;
// Create a new specialisation entry.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index afd6e034f46d..767b7c7defbb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -906,7 +906,7 @@ InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
bool IsCastOpRHS = (CastOp == RHS);
- bool IsZExt = isa<ZExtInst>(CastOp);
+ bool IsZExt = isa<ZExtOperator>(CastOp);
Constant *C;
if (IsTrueArm) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 3e3be536defc..597cec8e61c9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1777,6 +1777,20 @@ void CHR::cloneScopeBlocks(CHRScope *Scope,
BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
NewBlocks.push_back(NewBB);
VMap[BB] = NewBB;
+
+ // Unreachable predecessors will not be cloned and will not have an edge
+ // to the cloned block. As such, also remove them from any phi nodes.
+ // To avoid iterator invalidation, first collect the dead predecessors
+ // from the first phi node, and then perform the actual removal.
+ if (auto *FirstPN = dyn_cast<PHINode>(NewBB->begin())) {
+ SmallVector<BasicBlock *> DeadPreds;
+ for (BasicBlock *Pred : FirstPN->blocks())
+ if (!DT.isReachableFromEntry(Pred))
+ DeadPreds.push_back(Pred);
+ for (PHINode &PN : make_early_inc_range(NewBB->phis()))
+ for (BasicBlock *Pred : DeadPreds)
+ PN.removeIncomingValue(Pred);
+ }
}
// Place the cloned blocks right after the original blocks (right before the
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 21f0b1a92293..75adcabc0d34 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -898,7 +898,9 @@ bool GCOVProfiler::emitProfileNotes(
if (Line == Loc.getLine()) continue;
Line = Loc.getLine();
- if (SP != getDISubprogram(Loc.getScope()))
+ MDNode *Scope = Loc.getScope();
+ // TODO: Handle blocks from another file due to #line, #include, etc.
+ if (isa<DILexicalBlockFile>(Scope) || SP != getDISubprogram(Scope))
continue;
GCOVLines &Lines = Block.getFile(Filename);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 15628d32280d..2b88dd08d88b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -424,7 +424,7 @@ static Decomposition decompose(Value *V,
return MergeResults(Op0, Op1, IsSigned);
ConstantInt *CI;
- if (match(V, m_NSWMul(m_Value(Op0), m_ConstantInt(CI)))) {
+ if (match(V, m_NSWMul(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI)) {
auto Result = decompose(Op0, Preconditions, IsSigned, DL);
Result.mul(CI->getSExtValue());
return Result;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 24390f1b54f6..5b8f1b00dc03 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1269,6 +1269,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
if (IsLoadCSE) {
LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
combineMetadataForCSE(NLoadI, LoadI, false);
+ LVI->forgetValue(NLoadI);
};
// If the returned value is the load itself, replace with poison. This can
@@ -1461,6 +1462,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
for (LoadInst *PredLoadI : CSELoads) {
combineMetadataForCSE(PredLoadI, LoadI, true);
+ LVI->forgetValue(PredLoadI);
}
LoadI->replaceAllUsesWith(PN);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 68642a01b37c..00937e0d734a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -69,7 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
-STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
namespace {
@@ -731,23 +730,6 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
return true;
}
- // If this is a load-store pair from a stack slot to a stack slot, we
- // might be able to perform the stack-move optimization just as we do for
- // memcpys from an alloca to an alloca.
- if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
- if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
- if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
- DL.getTypeStoreSize(T), BAA)) {
- // Avoid invalidating the iterator.
- BBI = SI->getNextNonDebugInstruction()->getIterator();
- eraseInstruction(SI);
- eraseInstruction(LI);
- ++NumMemCpyInstr;
- return true;
- }
- }
- }
-
return false;
}
@@ -1426,217 +1408,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return true;
}
-// Attempts to optimize the pattern whereby memory is copied from an alloca to
-// another alloca, where the two allocas don't have conflicting mod/ref. If
-// successful, the two allocas can be merged into one and the transfer can be
-// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
-// move operations in that language.
-//
-// Once we determine that the optimization is safe to perform, we replace all
-// uses of the destination alloca with the source alloca. We also "shrink wrap"
-// the lifetime markers of the single merged alloca to before the first use
-// and after the last use. Note that the "shrink wrapping" procedure is a safe
-// transformation only because we restrict the scope of this optimization to
-// allocas that aren't captured.
-bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
- AllocaInst *DestAlloca,
- AllocaInst *SrcAlloca, uint64_t Size,
- BatchAAResults &BAA) {
- LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
- << *Store << "\n");
-
- // Make sure the two allocas are in the same address space.
- if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
- LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
- return false;
- }
-
- // 1. Check that copy is full. Calculate the static size of the allocas to be
- // merged, bail out if we can't.
- const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
- std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
- if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {
- LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
- return false;
- }
- std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
- if (!DestSize || DestSize->isScalable() ||
- Size != DestSize->getFixedValue()) {
- LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
- return false;
- }
-
- // 2-1. Check that src and dest are static allocas, which are not affected by
- // stacksave/stackrestore.
- if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() ||
- SrcAlloca->getParent() != Load->getParent() ||
- SrcAlloca->getParent() != Store->getParent())
- return false;
-
- // 2-2. Check that src and dest are never captured, unescaped allocas. Also
- // collect lifetime markers first/last users in order to shrink wrap the
- // lifetimes, and instructions with noalias metadata to remove them.
-
- SmallVector<Instruction *, 4> LifetimeMarkers;
- Instruction *FirstUser = nullptr, *LastUser = nullptr;
- SmallSet<Instruction *, 4> NoAliasInstrs;
-
- // Recursively track the user and check whether modified alias exist.
- auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
- bool CanBeNull, CanBeFreed;
- return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
- };
-
- auto CaptureTrackingWithModRef =
- [&](Instruction *AI,
- function_ref<bool(Instruction *)> ModRefCallback) -> bool {
- SmallVector<Instruction *, 8> Worklist;
- Worklist.push_back(AI);
- unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
- Worklist.reserve(MaxUsesToExplore);
- SmallSet<const Use *, 20> Visited;
- while (!Worklist.empty()) {
- Instruction *I = Worklist.back();
- Worklist.pop_back();
- for (const Use &U : I->uses()) {
- if (Visited.size() >= MaxUsesToExplore) {
- LLVM_DEBUG(
- dbgs()
- << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
- return false;
- }
- if (!Visited.insert(&U).second)
- continue;
- switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
- case UseCaptureKind::MAY_CAPTURE:
- return false;
- case UseCaptureKind::PASSTHROUGH:
- // Instructions cannot have non-instruction users.
- Worklist.push_back(cast<Instruction>(U.getUser()));
- continue;
- case UseCaptureKind::NO_CAPTURE: {
- auto *UI = cast<Instruction>(U.getUser());
- if (DestAlloca->getParent() != UI->getParent())
- return false;
- if (!FirstUser || UI->comesBefore(FirstUser))
- FirstUser = UI;
- if (!LastUser || LastUser->comesBefore(UI))
- LastUser = UI;
- if (UI->isLifetimeStartOrEnd()) {
- // We note the locations of these intrinsic calls so that we can
- // delete them later if the optimization succeeds, this is safe
- // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
- // conceptually fill all the bytes of the alloca with an undefined
- // value.
- int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
- if (Size < 0 || Size == DestSize) {
- LifetimeMarkers.push_back(UI);
- continue;
- }
- }
- if (UI->hasMetadata(LLVMContext::MD_noalias))
- NoAliasInstrs.insert(UI);
- if (!ModRefCallback(UI))
- return false;
- }
- }
- }
- }
- return true;
- };
-
- // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
- // from the alloca to the Store.
- ModRefInfo DestModRef = ModRefInfo::NoModRef;
- MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
- auto DestModRefCallback = [&](Instruction *UI) -> bool {
- // We don't care about the store itself.
- if (UI == Store)
- return true;
- ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
- // FIXME: For multi-BB cases, we need to see reachability from it to
- // store.
- // Bailout if Dest may have any ModRef before Store.
- if (UI->comesBefore(Store) && isModOrRefSet(Res))
- return false;
- DestModRef |= BAA.getModRefInfo(UI, DestLoc);
-
- return true;
- };
-
- if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
- return false;
-
- // 3. Check that, from after the Load to the end of the BB,
- // 3-1. if the dest has any Mod, src has no Ref, and
- // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
- MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
-
- auto SrcModRefCallback = [&](Instruction *UI) -> bool {
- // Any ModRef before Load doesn't matter, also Load and Store can be
- // ignored.
- if (UI->comesBefore(Load) || UI == Load || UI == Store)
- return true;
- ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
- if ((isModSet(DestModRef) && isRefSet(Res)) ||
- (isRefSet(DestModRef) && isModSet(Res)))
- return false;
-
- return true;
- };
-
- if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
- return false;
-
- // We can do the transformation. First, align the allocas appropriately.
- SrcAlloca->setAlignment(
- std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
-
- // Merge the two allocas.
- DestAlloca->replaceAllUsesWith(SrcAlloca);
- eraseInstruction(DestAlloca);
-
- // Drop metadata on the source alloca.
- SrcAlloca->dropUnknownNonDebugMetadata();
-
- // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists.
- if (!LifetimeMarkers.empty()) {
- LLVMContext &C = SrcAlloca->getContext();
- IRBuilder<> Builder(C);
-
- ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);
- // Create a new lifetime start marker before the first user of src or alloca
- // users.
- Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator());
- Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
-
- // Create a new lifetime end marker after the last user of src or alloca
- // users.
- // FIXME: If the last user is the terminator for the bb, we can insert
- // lifetime.end marker to the immidiate post-dominator, but currently do
- // nothing.
- if (!LastUser->isTerminator()) {
- Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator());
- Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
- }
-
- // Remove all other lifetime markers.
- for (Instruction *I : LifetimeMarkers)
- eraseInstruction(I);
- }
-
- // As this transformation can cause memory accesses that didn't previously
- // alias to begin to alias one another, we remove !noalias metadata from any
- // uses of either alloca. This is conservative, but more precision doesn't
- // seem worthwhile right now.
- for (Instruction *I : NoAliasInstrs)
- I->setMetadata(LLVMContext::MD_noalias, nullptr);
-
- LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
- NumStackMove++;
- return true;
-}
-
/// Perform simplification of memcpy's. If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
@@ -1693,14 +1464,13 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
AnyClobber, MemoryLocation::getForSource(M), BAA);
- // There are five possible optimizations we can do for memcpy:
+ // There are four possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
// c) memcpy from freshly alloca'd space or space that has just started
// its lifetime copies undefined data, and we can therefore eliminate
// the memcpy in favor of the data that was already at the destination.
// d) memcpy from a just-memset'd source can be turned into memset.
- // e) elimination of memcpy via stack-move optimization.
if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
if (Instruction *MI = MD->getMemoryInst()) {
if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
@@ -1719,8 +1489,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
}
if (auto *MDep = dyn_cast<MemCpyInst>(MI))
- if (processMemCpyMemCpyDependence(M, MDep, BAA))
- return true;
+ return processMemCpyMemCpyDependence(M, MDep, BAA);
if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
@@ -1739,27 +1508,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
}
- // If the transfer is from a stack slot to a stack slot, then we may be able
- // to perform the stack-move optimization. See the comments in
- // performStackMoveOptzn() for more details.
- auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
- if (!DestAlloca)
- return false;
- auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
- if (!SrcAlloca)
- return false;
- ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
- if (Len == nullptr)
- return false;
- if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(),
- BAA)) {
- // Avoid invalidating the iterator.
- BBI = M->getNextNonDebugInstruction()->getIterator();
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
-
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 4f1350e4ebb9..2031e70bee1d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -675,6 +675,12 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
if (CI->isByValArgument(I)) {
copyLocalTempOfByValueOperandIntoArguments(CI, I);
+ // When eliminating a tail call, we modify the values of the arguments.
+ // Therefore, if the byval parameter has a readonly attribute, we have to
+ // remove it. It is safe because, from the perspective of a caller, the
+ // byval parameter is always treated as "readonly," even if the readonly
+ // attribute is removed.
+ F.removeParamAttr(I, Attribute::ReadOnly);
ArgumentPHIs[I]->addIncoming(F.getArg(I), BB);
} else
ArgumentPHIs[I]->addIncoming(CI->getArgOperand(I), BB);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 5b0951252c07..3ad97613fe7a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -227,9 +227,21 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
return ConstantInt::get(RetTy, Result);
}
+static bool isOnlyUsedInComparisonWithZero(Value *V) {
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
const DataLayout &DL) {
- if (!isOnlyUsedInZeroComparison(CI))
+ if (!isOnlyUsedInComparisonWithZero(CI))
return false;
if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d7e40e8ef978..b603bbe55dc9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3781,10 +3781,44 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
// the incoming edges.
VPBasicBlock *Header =
State.Plan->getVectorLoopRegion()->getEntryBasicBlock();
+
+ // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
+ // sank outside of the loop would keep the same order as they had in the
+ // original loop.
+ SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
for (VPRecipeBase &R : Header->phis()) {
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
- fixReduction(ReductionPhi, State);
- else if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
+ ReductionPHIList.emplace_back(ReductionPhi);
+ }
+ stable_sort(ReductionPHIList, [this](const VPReductionPHIRecipe *R1,
+ const VPReductionPHIRecipe *R2) {
+ auto *IS1 = R1->getRecurrenceDescriptor().IntermediateStore;
+ auto *IS2 = R2->getRecurrenceDescriptor().IntermediateStore;
+
+ // If neither of the recipes has an intermediate store, keep the order the
+ // same.
+ if (!IS1 && !IS2)
+ return false;
+
+ // If only one of the recipes has an intermediate store, then move it
+ // towards the beginning of the list.
+ if (IS1 && !IS2)
+ return true;
+
+ if (!IS1 && IS2)
+ return false;
+
+ // If both recipes have an intermediate store, then the recipe with the
+ // later store should be processed earlier. So it should go to the beginning
+ // of the list.
+ return DT->dominates(IS2, IS1);
+ });
+
+ for (VPReductionPHIRecipe *ReductionPhi : ReductionPHIList)
+ fixReduction(ReductionPhi, State);
+
+ for (VPRecipeBase &R : Header->phis()) {
+ if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
fixFixedOrderRecurrence(FOR, State);
}
}