aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp196
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp146
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h30
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h1
17 files changed, 534 insertions, 308 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index f37b4dc938d3..529f7309a1a2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -2951,9 +2951,11 @@ void coro::salvageDebugInfo(
// dbg.declare does.
if (isa<DbgDeclareInst>(DVI)) {
std::optional<BasicBlock::iterator> InsertPt;
- if (auto *I = dyn_cast<Instruction>(Storage))
+ if (auto *I = dyn_cast<Instruction>(Storage)) {
InsertPt = I->getInsertionPointAfterDef();
- else if (isa<Argument>(Storage))
+ if (!OptimizeFrame && I->getDebugLoc())
+ DVI.setDebugLoc(I->getDebugLoc());
+ } else if (isa<Argument>(Storage))
InsertPt = F->getEntryBlock().begin();
if (InsertPt)
DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 7c277518b21d..7ebf265e17ba 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -76,6 +76,7 @@ STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
+STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
STATISTIC(NumNoFree, "Number of functions marked as nofree");
@@ -1279,6 +1280,45 @@ static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
}
}
+/// Deduce noundef attributes for the SCC.
+static void addNoUndefAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
+ // Check each function in turn, determining which functions return noundef
+ // values.
+ for (Function *F : SCCNodes) {
+ // Already noundef.
+ if (F->getAttributes().hasRetAttr(Attribute::NoUndef))
+ continue;
+
+ // We can infer and propagate function attributes only when we know that the
+ // definition we'll get at link time is *exactly* the definition we see now.
+ // For more details, see GlobalValue::mayBeDerefined.
+ if (!F->hasExactDefinition())
+ return;
+
+ // MemorySanitizer assumes that the definition and declaration of a
+ // function will be consistent. A function with sanitize_memory attribute
+ // should be skipped from inference.
+ if (F->hasFnAttribute(Attribute::SanitizeMemory))
+ continue;
+
+ if (F->getReturnType()->isVoidTy())
+ continue;
+
+ if (all_of(*F, [](BasicBlock &BB) {
+ if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) {
+ // TODO: perform context-sensitive analysis?
+ return isGuaranteedNotToBeUndefOrPoison(Ret->getReturnValue());
+ }
+ return true;
+ })) {
+ F->addRetAttr(Attribute::NoUndef);
+ ++NumNoUndefReturn;
+ Changed.insert(F);
+ }
+ }
+}
+
namespace {
/// Collects a set of attribute inference requests and performs them all in one
@@ -1629,7 +1669,10 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
for (auto &I : BB.instructionsWithoutDebug())
if (auto *CB = dyn_cast<CallBase>(&I)) {
Function *Callee = CB->getCalledFunction();
- if (!Callee || Callee == F || !Callee->doesNotRecurse())
+ if (!Callee || Callee == F ||
+ (!Callee->doesNotRecurse() &&
+ !(Callee->isDeclaration() &&
+ Callee->hasFnAttribute(Attribute::NoCallback))))
// Function calls a potentially recursive function.
return;
}
@@ -1785,6 +1828,7 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
inferConvergent(Nodes.SCCNodes, Changed);
addNoReturnAttrs(Nodes.SCCNodes, Changed);
addWillReturn(Nodes.SCCNodes, Changed);
+ addNoUndefAttrs(Nodes.SCCNodes, Changed);
// If we have no external nodes participating in the SCC, we can deduce some
// more precise attributes as well.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 719a2678fc18..556fde37efeb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1685,8 +1685,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
assert(NotLHS != nullptr && NotRHS != nullptr &&
"isFreeToInvert desynced with getFreelyInverted");
Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS);
- return BinaryOperator::CreateSub(ConstantInt::get(RHS->getType(), -2),
- LHSPlusRHS);
+ return BinaryOperator::CreateSub(
+ ConstantInt::getSigned(RHS->getType(), -2), LHSPlusRHS);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 63b1e0f64a88..c03f50d75814 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3513,9 +3513,13 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateOr(Op0, C);
// ((B | C) & A) | B -> B | (A & C)
- if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
+ if (match(Op0, m_c_And(m_c_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C));
+ // B | ((B | C) & A) -> B | (A & C)
+ if (match(Op1, m_c_And(m_c_Or(m_Specific(Op0), m_Value(C)), m_Value(A))))
+ return BinaryOperator::CreateOr(Op0, Builder.CreateAnd(A, C));
+
if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this))
return DeMorgan;
@@ -3872,6 +3876,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
}
}
+ // (X & C1) | C2 -> X & (C1 | C2) iff (X & C2) == C2
+ if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C1)))) &&
+ match(Op1, m_APInt(C2))) {
+ KnownBits KnownX = computeKnownBits(X, /*Depth*/ 0, &I);
+ if ((KnownX.One & *C2) == *C2)
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2));
+ }
+
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 3b7fe7fa2266..43d4496571be 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3850,6 +3850,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
if (Callee->hasFnAttribute("thunk"))
return false;
+ // If this is a call to a naked function, the assembly might be
+ // using an argument, or otherwise rely on the frame layout,
+ // the function prototype will mismatch.
+ if (Callee->hasFnAttribute(Attribute::Naked))
+ return false;
+
// If this is a musttail call, the callee's prototype must match the caller's
// prototype with the exception of pointee types. The code below doesn't
// implement that, so we can't do this transform.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 289976718e52..3875e59c3ede 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -111,8 +111,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
ConstantInt *AndCst) {
if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
- GV->getValueType() != GEP->getSourceElementType() ||
- !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() ||
+ !GV->hasDefinitiveInitializer())
return nullptr;
Constant *Init = GV->getInitializer();
@@ -128,8 +128,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// the simple index into a single-dimensional array.
//
// Require: GEP GV, 0, i {{, constant indices}}
- if (GEP->getNumOperands() < 3 ||
- !isa<ConstantInt>(GEP->getOperand(1)) ||
+ if (GEP->getNumOperands() < 3 || !isa<ConstantInt>(GEP->getOperand(1)) ||
!cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
isa<Constant>(GEP->getOperand(2)))
return nullptr;
@@ -142,15 +141,18 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
Type *EltTy = Init->getType()->getArrayElementType();
for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!Idx) return nullptr; // Variable index.
+ if (!Idx)
+ return nullptr; // Variable index.
uint64_t IdxVal = Idx->getZExtValue();
- if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
+ if ((unsigned)IdxVal != IdxVal)
+ return nullptr; // Too large array index.
if (StructType *STy = dyn_cast<StructType>(EltTy))
EltTy = STy->getElementType(IdxVal);
else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
- if (IdxVal >= ATy->getNumElements()) return nullptr;
+ if (IdxVal >= ATy->getNumElements())
+ return nullptr;
EltTy = ATy->getElementType();
} else {
return nullptr; // Unknown type.
@@ -191,7 +193,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
Constant *Elt = Init->getAggregateElement(i);
- if (!Elt) return nullptr;
+ if (!Elt)
+ return nullptr;
// If this is indexing an array of structures, get the structure element.
if (!LaterIndices.empty()) {
@@ -214,16 +217,17 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
if (isa<UndefValue>(C)) {
// Extend range state machines to cover this element in case there is an
// undef in the middle of the range.
- if (TrueRangeEnd == (int)i-1)
+ if (TrueRangeEnd == (int)i - 1)
TrueRangeEnd = i;
- if (FalseRangeEnd == (int)i-1)
+ if (FalseRangeEnd == (int)i - 1)
FalseRangeEnd = i;
continue;
}
// If we can't compute the result for any of the elements, we have to give
// up evaluating the entire conditional.
- if (!isa<ConstantInt>(C)) return nullptr;
+ if (!isa<ConstantInt>(C))
+ return nullptr;
// Otherwise, we know if the comparison is true or false for this element,
// update our state machines.
@@ -233,7 +237,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
if (IsTrueForElt) {
// Update the TrueElement state machine.
if (FirstTrueElement == Undefined)
- FirstTrueElement = TrueRangeEnd = i; // First true element.
+ FirstTrueElement = TrueRangeEnd = i; // First true element.
else {
// Update double-compare state machine.
if (SecondTrueElement == Undefined)
@@ -242,7 +246,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
SecondTrueElement = Overdefined;
// Update range state machine.
- if (TrueRangeEnd == (int)i-1)
+ if (TrueRangeEnd == (int)i - 1)
TrueRangeEnd = i;
else
TrueRangeEnd = Overdefined;
@@ -259,7 +263,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
SecondFalseElement = Overdefined;
// Update range state machine.
- if (FalseRangeEnd == (int)i-1)
+ if (FalseRangeEnd == (int)i - 1)
FalseRangeEnd = i;
else
FalseRangeEnd = Overdefined;
@@ -348,7 +352,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// False for two elements -> 'i != 47 & i != 72'.
Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
- Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
+ Value *SecondFalseIdx =
+ ConstantInt::get(Idx->getType(), SecondFalseElement);
Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
return BinaryOperator::CreateAnd(C1, C2);
}
@@ -365,8 +370,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
Idx = Builder.CreateAdd(Idx, Offs);
}
- Value *End = ConstantInt::get(Idx->getType(),
- TrueRangeEnd-FirstTrueElement+1);
+ Value *End =
+ ConstantInt::get(Idx->getType(), TrueRangeEnd - FirstTrueElement + 1);
return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
}
@@ -380,8 +385,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
Idx = Builder.CreateAdd(Idx, Offs);
}
- Value *End = ConstantInt::get(Idx->getType(),
- FalseRangeEnd-FirstFalseElement);
+ Value *End =
+ ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement);
return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
}
@@ -4624,27 +4629,35 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
}
bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
- if (BO0 && isa<OverflowingBinaryOperator>(BO0))
- NoOp0WrapProblem =
- ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
- if (BO1 && isa<OverflowingBinaryOperator>(BO1))
- NoOp1WrapProblem =
- ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
-
+ bool Op0HasNUW = false, Op1HasNUW = false;
+ bool Op0HasNSW = false, Op1HasNSW = false;
// Analyze the case when either Op0 or Op1 is an add instruction.
// Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+ auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred,
+ bool &HasNSW, bool &HasNUW) -> bool {
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ HasNUW = BO.hasNoUnsignedWrap();
+ HasNSW = BO.hasNoSignedWrap();
+ return ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && HasNUW) ||
+ (CmpInst::isSigned(Pred) && HasNSW);
+ } else if (BO.getOpcode() == Instruction::Or) {
+ HasNUW = true;
+ HasNSW = true;
+ return true;
+ } else {
+ return false;
+ }
+ };
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
- if (BO0 && BO0->getOpcode() == Instruction::Add) {
- A = BO0->getOperand(0);
- B = BO0->getOperand(1);
+
+ if (BO0) {
+ match(BO0, m_AddLike(m_Value(A), m_Value(B)));
+ NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW);
}
- if (BO1 && BO1->getOpcode() == Instruction::Add) {
- C = BO1->getOperand(0);
- D = BO1->getOperand(1);
+ if (BO1) {
+ match(BO1, m_AddLike(m_Value(C), m_Value(D)));
+ NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW);
}
// icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow.
@@ -4764,17 +4777,15 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
APInt AP2Abs = AP2->abs();
if (AP1Abs.uge(AP2Abs)) {
APInt Diff = *AP1 - *AP2;
- bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1);
- bool HasNSW = BO0->hasNoSignedWrap();
Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
- Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW);
+ Value *NewAdd = Builder.CreateAdd(
+ A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW);
return new ICmpInst(Pred, NewAdd, C);
} else {
APInt Diff = *AP2 - *AP1;
- bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2);
- bool HasNSW = BO1->hasNoSignedWrap();
Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
- Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW);
+ Value *NewAdd = Builder.CreateAdd(
+ C, C3, "", Op1HasNUW && Diff.ule(*AP2), Op1HasNSW);
return new ICmpInst(Pred, A, NewAdd);
}
}
@@ -4868,16 +4879,14 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
// if Z != 0 and nsw(X * Z) and nsw(Y * Z)
// X * Z eq/ne Y * Z -> X eq/ne Y
- if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() &&
- BO1->hasNoSignedWrap())
+ if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW)
return new ICmpInst(Pred, X, Y);
} else
NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
// If Z != 0 and nuw(X * Z) and nuw(Y * Z)
// X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y
- if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() &&
- BO1->hasNoUnsignedWrap())
+ if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW)
return new ICmpInst(Pred, X, Y);
}
}
@@ -4966,7 +4975,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
case Instruction::SDiv:
- if (!I.isEquality() || !BO0->isExact() || !BO1->isExact())
+ if (!(I.isEquality() || match(BO0->getOperand(1), m_NonNegative())) ||
+ !BO0->isExact() || !BO1->isExact())
break;
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
@@ -4976,8 +4986,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
case Instruction::Shl: {
- bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
- bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+ bool NUW = Op0HasNUW && Op1HasNUW;
+ bool NSW = Op0HasNSW && Op1HasNSW;
if (!NUW && !NSW)
break;
if (!NSW && I.isSigned())
@@ -5029,10 +5039,10 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
}
/// Fold icmp Pred min|max(X, Y), Z.
-Instruction *
-InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I,
- MinMaxIntrinsic *MinMax, Value *Z,
- ICmpInst::Predicate Pred) {
+Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I,
+ MinMaxIntrinsic *MinMax,
+ Value *Z,
+ ICmpInst::Predicate Pred) {
Value *X = MinMax->getLHS();
Value *Y = MinMax->getRHS();
if (ICmpInst::isSigned(Pred) && !MinMax->isSigned())
@@ -5161,24 +5171,6 @@ InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I,
return nullptr;
}
-Instruction *InstCombinerImpl::foldICmpWithMinMax(ICmpInst &Cmp) {
- ICmpInst::Predicate Pred = Cmp.getPredicate();
- Value *Lhs = Cmp.getOperand(0);
- Value *Rhs = Cmp.getOperand(1);
-
- if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Lhs)) {
- if (Instruction *Res = foldICmpWithMinMaxImpl(Cmp, MinMax, Rhs, Pred))
- return Res;
- }
-
- if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Rhs)) {
- if (Instruction *Res = foldICmpWithMinMaxImpl(
- Cmp, MinMax, Lhs, ICmpInst::getSwappedPredicate(Pred)))
- return Res;
- }
-
- return nullptr;
-}
// Canonicalize checking for a power-of-2-or-zero value:
static Instruction *foldICmpPow2Test(ICmpInst &I,
@@ -6843,6 +6835,34 @@ static Instruction *foldReductionIdiom(ICmpInst &I,
return nullptr;
}
+// This helper will be called with icmp operands in both orders.
+Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred,
+ Value *Op0, Value *Op1,
+ ICmpInst &CxtI) {
+ // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'.
+ if (auto *GEP = dyn_cast<GEPOperator>(Op0))
+ if (Instruction *NI = foldGEPICmp(GEP, Op1, Pred, CxtI))
+ return NI;
+
+ if (auto *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI))
+ return NI;
+
+ if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0))
+ if (Instruction *Res = foldICmpWithMinMax(CxtI, MinMax, Op1, Pred))
+ return Res;
+
+ {
+ Value *X;
+ const APInt *C;
+ // icmp X+Cst, X
+ if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
+ return foldICmpAddOpConst(X, *C, Pred);
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -6966,20 +6986,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
return Res;
- // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'.
- if (auto *GEP = dyn_cast<GEPOperator>(Op0))
- if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
- return NI;
- if (auto *GEP = dyn_cast<GEPOperator>(Op1))
- if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I))
- return NI;
-
- if (auto *SI = dyn_cast<SelectInst>(Op0))
- if (Instruction *NI = foldSelectICmp(I.getPredicate(), SI, Op1, I))
- return NI;
- if (auto *SI = dyn_cast<SelectInst>(Op1))
- if (Instruction *NI = foldSelectICmp(I.getSwappedPredicate(), SI, Op0, I))
- return NI;
+ if (Instruction *Res = foldICmpCommutative(I.getPredicate(), Op0, Op1, I))
+ return Res;
+ if (Instruction *Res =
+ foldICmpCommutative(I.getSwappedPredicate(), Op1, Op0, I))
+ return Res;
// In case of a comparison with two select instructions having the same
// condition, check whether one of the resulting branches can be simplified.
@@ -7030,9 +7041,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *R = foldICmpWithCastOp(I))
return R;
- if (Instruction *Res = foldICmpWithMinMax(I))
- return Res;
-
{
Value *X, *Y;
// Transform (X & ~Y) == 0 --> (X & Y) != 0
@@ -7134,18 +7142,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
!ACXI->isWeak())
return ExtractValueInst::Create(ACXI, 1);
- {
- Value *X;
- const APInt *C;
- // icmp X+Cst, X
- if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
- return foldICmpAddOpConst(X, *C, I.getPredicate());
-
- // icmp X, X+Cst
- if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X)
- return foldICmpAddOpConst(X, *C, I.getSwappedPredicate());
- }
-
if (Instruction *Res = foldICmpWithHighBitMask(I, Builder))
return Res;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9e76a0cf17b1..bdaf7550b4b4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -648,9 +648,8 @@ public:
Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp,
const APInt &C);
Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ);
- Instruction *foldICmpWithMinMaxImpl(Instruction &I, MinMaxIntrinsic *MinMax,
- Value *Z, ICmpInst::Predicate Pred);
- Instruction *foldICmpWithMinMax(ICmpInst &Cmp);
+ Instruction *foldICmpWithMinMax(Instruction &I, MinMaxIntrinsic *MinMax,
+ Value *Z, ICmpInst::Predicate Pred);
Instruction *foldICmpEquality(ICmpInst &Cmp);
Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I);
Instruction *foldSignBitTest(ICmpInst &I);
@@ -708,6 +707,8 @@ public:
const APInt &C);
Instruction *foldICmpBitCast(ICmpInst &Cmp);
Instruction *foldICmpWithTrunc(ICmpInst &Cmp);
+ Instruction *foldICmpCommutative(ICmpInst::Predicate Pred, Value *Op0,
+ Value *Op1, ICmpInst &CxtI);
// Helpers of visitSelectInst().
Instruction *foldSelectOfBools(SelectInst &SI);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 20bf00344b14..ab55f235920a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1171,14 +1171,15 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
return nullptr;
}
-static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp,
- InstCombinerImpl &IC) {
+static Value *canonicalizeSPF(ICmpInst &Cmp, Value *TrueVal, Value *FalseVal,
+ InstCombinerImpl &IC) {
Value *LHS, *RHS;
// TODO: What to do with pointer min/max patterns?
- if (!Sel.getType()->isIntOrIntVectorTy())
+ if (!TrueVal->getType()->isIntOrIntVectorTy())
return nullptr;
- SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor;
+ SelectPatternFlavor SPF =
+ matchDecomposedSelectPattern(&Cmp, TrueVal, FalseVal, LHS, RHS).Flavor;
if (SPF == SelectPatternFlavor::SPF_ABS ||
SPF == SelectPatternFlavor::SPF_NABS) {
if (!Cmp.hasOneUse() && !RHS->hasOneUse())
@@ -1188,13 +1189,13 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp,
bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS &&
match(RHS, m_NSWNeg(m_Specific(LHS)));
Constant *IntMinIsPoisonC =
- ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison);
+ ConstantInt::get(Type::getInt1Ty(Cmp.getContext()), IntMinIsPoison);
Instruction *Abs =
IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC);
if (SPF == SelectPatternFlavor::SPF_NABS)
- return BinaryOperator::CreateNeg(Abs); // Always without NSW flag!
- return IC.replaceInstUsesWith(Sel, Abs);
+ return IC.Builder.CreateNeg(Abs); // Always without NSW flag!
+ return Abs;
}
if (SelectPatternResult::isMinOrMax(SPF)) {
@@ -1215,8 +1216,7 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp,
default:
llvm_unreachable("Unexpected SPF");
}
- return IC.replaceInstUsesWith(
- Sel, IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS));
+ return IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS);
}
return nullptr;
@@ -1677,8 +1677,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI))
return NewSel;
- if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this))
- return NewSPF;
+ if (Value *V =
+ canonicalizeSPF(*ICI, SI.getTrueValue(), SI.getFalseValue(), *this))
+ return replaceInstUsesWith(SI, V);
if (Value *V = foldSelectInstWithICmpConst(SI, ICI, Builder))
return replaceInstUsesWith(SI, V);
@@ -2363,6 +2364,9 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel,
Value *FVal = Sel.getFalseValue();
Type *SelType = Sel.getType();
+ if (ICmpInst::makeCmpResultType(TVal->getType()) != Cond->getType())
+ return nullptr;
+
// Match select ?, TC, FC where the constants are equal but negated.
// TODO: Generalize to handle a negated variable operand?
const APFloat *TC, *FC;
@@ -3790,5 +3794,50 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Instruction *I = foldBitCeil(SI, Builder))
return I;
+ // Fold:
+ // (select A && B, T, F) -> (select A, (select B, T, F), F)
+ // (select A || B, T, F) -> (select A, T, (select B, T, F))
+ // if (select B, T, F) is foldable.
+ // TODO: preserve FMF flags
+ auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A,
+ Value *B) -> Instruction * {
+ if (Value *V = simplifySelectInst(B, TrueVal, FalseVal,
+ SQ.getWithInstruction(&SI)))
+ return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V);
+
+ // Is (select B, T, F) a SPF?
+ if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) {
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(B))
+ if (Value *V = canonicalizeSPF(*Cmp, TrueVal, FalseVal, *this))
+ return SelectInst::Create(A, IsAnd ? V : TrueVal,
+ IsAnd ? FalseVal : V);
+ }
+
+ return nullptr;
+ };
+
+ Value *LHS, *RHS;
+ if (match(CondVal, m_And(m_Value(LHS), m_Value(RHS)))) {
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS))
+ return I;
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, RHS, LHS))
+ return I;
+ } else if (match(CondVal, m_Or(m_Value(LHS), m_Value(RHS)))) {
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS))
+ return I;
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, RHS, LHS))
+ return I;
+ } else {
+ // We cannot swap the operands of logical and/or.
+ // TODO: Can we swap the operands by inserting a freeze?
+ if (match(CondVal, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) {
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS))
+ return I;
+ } else if (match(CondVal, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) {
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS))
+ return I;
+ }
+ }
+
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7f5a7b666903..351fc3b0174f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2469,31 +2469,43 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
DL.getIndexSizeInBits(AS)) {
uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
- bool Matched = false;
- uint64_t C;
- Value *V = nullptr;
if (TyAllocSize == 1) {
- V = GEP.getOperand(1);
- Matched = true;
- } else if (match(GEP.getOperand(1),
- m_AShr(m_Value(V), m_ConstantInt(C)))) {
- if (TyAllocSize == 1ULL << C)
- Matched = true;
- } else if (match(GEP.getOperand(1),
- m_SDiv(m_Value(V), m_ConstantInt(C)))) {
- if (TyAllocSize == C)
- Matched = true;
+ // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
+ // but only if the result pointer is only used as if it were an integer,
+ // or both point to the same underlying object (otherwise provenance is
+ // not necessarily retained).
+ Value *X = GEP.getPointerOperand();
+ Value *Y;
+ if (match(GEP.getOperand(1),
+ m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
+ GEPType == Y->getType()) {
+ bool HasSameUnderlyingObject =
+ getUnderlyingObject(X) == getUnderlyingObject(Y);
+ bool Changed = false;
+ GEP.replaceUsesWithIf(Y, [&](Use &U) {
+ bool ShouldReplace = HasSameUnderlyingObject ||
+ isa<ICmpInst>(U.getUser()) ||
+ isa<PtrToIntInst>(U.getUser());
+ Changed |= ShouldReplace;
+ return ShouldReplace;
+ });
+ return Changed ? &GEP : nullptr;
+ }
+ } else {
+ // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
+ Value *V;
+ if ((has_single_bit(TyAllocSize) &&
+ match(GEP.getOperand(1),
+ m_Exact(m_AShr(m_Value(V),
+ m_SpecificInt(countr_zero(TyAllocSize)))))) ||
+ match(GEP.getOperand(1),
+ m_Exact(m_SDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
+ GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
+ Builder.getInt8Ty(), GEP.getPointerOperand(), V);
+ NewGEP->setIsInBounds(GEP.isInBounds());
+ return NewGEP;
+ }
}
-
- // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
- // only if both point to the same underlying object (otherwise provenance
- // is not necessarily retained).
- Value *Y;
- Value *X = GEP.getOperand(0);
- if (Matched &&
- match(V, m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
- getUnderlyingObject(X) == getUnderlyingObject(Y))
- return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
}
}
// We do not handle pointer-vector geps here.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 899d7e0a11e6..06c87bd6dc37 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -366,6 +366,13 @@ struct Decomposition {
append_range(Vars, Other.Vars);
}
+ void sub(const Decomposition &Other) {
+ Decomposition Tmp = Other;
+ Tmp.mul(-1);
+ add(Tmp.Offset);
+ append_range(Vars, Tmp.Vars);
+ }
+
void mul(int64_t Factor) {
Offset = multiplyWithOverflow(Offset, Factor);
for (auto &Var : Vars)
@@ -569,10 +576,12 @@ static Decomposition decompose(Value *V,
return Result;
}
- if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI))
- return {-1 * CI->getSExtValue(), {{1, Op0}}};
- if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1))))
- return {0, {{1, Op0}, {-1, Op1}}};
+ if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) {
+ auto ResA = decompose(Op0, Preconditions, IsSigned, DL);
+ auto ResB = decompose(Op1, Preconditions, IsSigned, DL);
+ ResA.sub(ResB);
+ return ResA;
+ }
return {V, IsKnownNonNegative};
}
@@ -1010,22 +1019,14 @@ void State::addInfoFor(BasicBlock &BB) {
continue;
}
- if (match(&I, m_Intrinsic<Intrinsic::ssub_with_overflow>())) {
- WorkList.push_back(
- FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I)));
- continue;
- }
-
- if (isa<MinMaxIntrinsic>(&I)) {
- WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I));
- continue;
- }
-
- Value *A, *B;
- CmpInst::Predicate Pred;
- // For now, just handle assumes with a single compare as condition.
- if (match(&I, m_Intrinsic<Intrinsic::assume>(
- m_ICmp(Pred, m_Value(A), m_Value(B))))) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
+ switch (ID) {
+ case Intrinsic::assume: {
+ Value *A, *B;
+ CmpInst::Predicate Pred;
+ if (!match(I.getOperand(0), m_ICmp(Pred, m_Value(A), m_Value(B))))
+ break;
if (GuaranteedToExecute) {
// The assume is guaranteed to execute when BB is entered, hence Cond
// holds on entry to BB.
@@ -1035,7 +1036,23 @@ void State::addInfoFor(BasicBlock &BB) {
WorkList.emplace_back(
FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I));
}
+ break;
+ }
+ // Enqueue ssub_with_overflow for simplification.
+ case Intrinsic::ssub_with_overflow:
+ WorkList.push_back(
+ FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I)));
+ break;
+ // Enqueue the intrinsics to add extra info.
+ case Intrinsic::abs:
+ case Intrinsic::umin:
+ case Intrinsic::umax:
+ case Intrinsic::smin:
+ case Intrinsic::smax:
+ WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I));
+ break;
}
+
GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
}
@@ -1693,6 +1710,13 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
ICmpInst::Predicate Pred;
if (!CB.isConditionFact()) {
+ Value *X;
+ if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) {
+ // TODO: Add CB.Inst >= 0 fact.
+ AddFact(CmpInst::ICMP_SGE, CB.Inst, X);
+ continue;
+ }
+
if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
AddFact(Pred, MinMax, MinMax->getLHS());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index fb4d82885377..282c44563466 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -29,9 +29,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -66,19 +67,6 @@ class CanonicalizeFreezeInLoopsImpl {
ScalarEvolution &SE;
DominatorTree &DT;
- struct FrozenIndPHIInfo {
- // A freeze instruction that uses an induction phi
- FreezeInst *FI = nullptr;
- // The induction phi, step instruction, the operand idx of StepInst which is
- // a step value
- PHINode *PHI;
- BinaryOperator *StepInst;
- unsigned StepValIdx = 0;
-
- FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst)
- : PHI(PHI), StepInst(StepInst) {}
- };
-
// Can freeze instruction be pushed into operands of I?
// In order to do this, I should not create a poison after I's flags are
// stripped.
@@ -99,6 +87,46 @@ public:
} // anonymous namespace
+namespace llvm {
+
+struct FrozenIndPHIInfo {
+ // A freeze instruction that uses an induction phi
+ FreezeInst *FI = nullptr;
+ // The induction phi, step instruction, the operand idx of StepInst which is
+ // a step value
+ PHINode *PHI;
+ BinaryOperator *StepInst;
+ unsigned StepValIdx = 0;
+
+ FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst)
+ : PHI(PHI), StepInst(StepInst) {}
+
+ bool operator==(const FrozenIndPHIInfo &Other) { return FI == Other.FI; }
+};
+
+template <> struct DenseMapInfo<FrozenIndPHIInfo> {
+ static inline FrozenIndPHIInfo getEmptyKey() {
+ return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getEmptyKey(),
+ DenseMapInfo<BinaryOperator *>::getEmptyKey());
+ }
+
+ static inline FrozenIndPHIInfo getTombstoneKey() {
+ return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getTombstoneKey(),
+ DenseMapInfo<BinaryOperator *>::getTombstoneKey());
+ }
+
+ static unsigned getHashValue(const FrozenIndPHIInfo &Val) {
+ return DenseMapInfo<FreezeInst *>::getHashValue(Val.FI);
+ };
+
+ static bool isEqual(const FrozenIndPHIInfo &LHS,
+ const FrozenIndPHIInfo &RHS) {
+ return LHS.FI == RHS.FI;
+ };
+};
+
+} // end namespace llvm
+
// Given U = (value, user), replace value with freeze(value), and let
// SCEV forget user. The inserted freeze is placed in the preheader.
void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) {
@@ -126,7 +154,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() {
if (!L->isLoopSimplifyForm())
return false;
- SmallVector<FrozenIndPHIInfo, 4> Candidates;
+ SmallSetVector<FrozenIndPHIInfo, 4> Candidates;
for (auto &PHI : L->getHeader()->phis()) {
InductionDescriptor ID;
@@ -155,7 +183,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() {
if (auto *FI = dyn_cast<FreezeInst>(U)) {
LLVM_DEBUG(dbgs() << "canonfr: found: " << *FI << "\n");
Info.FI = FI;
- Candidates.push_back(Info);
+ Candidates.insert(Info);
}
};
for_each(PHI.users(), Visit);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index a758fb306982..c76cc9db16d7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -3593,8 +3593,9 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C,
if (isa<ConstantInt>(C))
return createIntegerExpression(C);
- if (Ty.isFloatTy() || Ty.isDoubleTy()) {
- const APFloat &APF = cast<ConstantFP>(&C)->getValueAPF();
+ auto *FP = dyn_cast<ConstantFP>(&C);
+ if (FP && (Ty.isFloatTy() || Ty.isDoubleTy())) {
+ const APFloat &APF = FP->getValueAPF();
return DIB.createConstantValueExpression(
APF.bitcastToAPInt().getZExtValue());
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f82e161fb846..8e135d80f4f2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8174,13 +8174,20 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
bool Consecutive =
Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
+ VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1];
+ if (Consecutive) {
+ auto *VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
+ Reverse, I->getDebugLoc());
+ Builder.getInsertBlock()->appendRecipe(VectorPtr);
+ Ptr = VectorPtr;
+ }
if (LoadInst *Load = dyn_cast<LoadInst>(I))
- return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask,
- Consecutive, Reverse);
+ return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
+ Reverse);
StoreInst *Store = cast<StoreInst>(I);
- return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0],
- Mask, Consecutive, Reverse);
+ return new VPWidenMemoryInstructionRecipe(*Store, Ptr, Operands[0], Mask,
+ Consecutive, Reverse);
}
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -9475,8 +9482,8 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
bool isMaskRequired = getMask();
if (isMaskRequired) {
- // Mask reversal is only neede for non-all-one (null) masks, as reverse of a
- // null all-one mask is a null mask.
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse of
+ // a null all-one mask is a null mask.
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *Mask = State.get(getMask(), Part);
if (isReverse())
@@ -9485,44 +9492,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
}
}
- const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
- // Calculate the pointer for the specific unroll-part.
- Value *PartPtr = nullptr;
-
- // Use i32 for the gep index type when the value is constant,
- // or query DataLayout for a more suitable index type otherwise.
- const DataLayout &DL =
- Builder.GetInsertBlock()->getModule()->getDataLayout();
- Type *IndexTy = State.VF.isScalable() && (isReverse() || Part > 0)
- ? DL.getIndexType(PointerType::getUnqual(
- ScalarDataTy->getContext()))
- : Builder.getInt32Ty();
- bool InBounds = false;
- if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
- InBounds = gep->isInBounds();
- if (isReverse()) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- // RunTimeVF = VScale * VF.getKnownMinValue()
- // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
- // NumElt = -Part * RunTimeVF
- Value *NumElt =
- Builder.CreateMul(ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
- // LastLane = 1 - RunTimeVF
- Value *LastLane =
- Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
- PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds);
- PartPtr =
- Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds);
- } else {
- Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
- PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds);
- }
-
- return PartPtr;
- };
-
// Handle Stores:
if (SI) {
State.setDebugLocFrom(SI->getDebugLoc());
@@ -9543,8 +9512,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
- auto *VecPtr =
- CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
+ auto *VecPtr = State.get(getAddr(), Part);
if (isMaskRequired)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
BlockInMaskParts[Part]);
@@ -9568,8 +9536,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
nullptr, "wide.masked.gather");
State.addMetadata(NewLI, LI);
} else {
- auto *VecPtr =
- CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
+ auto *VecPtr = State.get(getAddr(), Part);
if (isMaskRequired)
NewLI = Builder.CreateMaskedLoad(
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 32913b3f5569..304991526064 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4925,36 +4925,34 @@ void BoUpSLP::buildExternalUses(
LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
Instruction *UserInst = dyn_cast<Instruction>(U);
- if (!UserInst)
+ if (!UserInst || isDeleted(UserInst))
continue;
- if (isDeleted(UserInst))
+ // Ignore users in the user ignore list.
+ if (UserIgnoreList && UserIgnoreList->contains(UserInst))
continue;
// Skip in-tree scalars that become vectors
if (TreeEntry *UseEntry = getTreeEntry(U)) {
- Value *UseScalar = UseEntry->Scalars[0];
// Some in-tree scalars will remain as scalar in vectorized
- // instructions. If that is the case, the one in Lane 0 will
+ // instructions. If that is the case, the one in FoundLane will
// be used.
- if (UseScalar != U ||
- UseEntry->State == TreeEntry::ScatterVectorize ||
+ if (UseEntry->State == TreeEntry::ScatterVectorize ||
UseEntry->State == TreeEntry::PossibleStridedVectorize ||
- !doesInTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
+ !doesInTreeUserNeedToExtract(
+ Scalar, cast<Instruction>(UseEntry->Scalars.front()), TLI)) {
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");
assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state");
continue;
}
+ U = nullptr;
}
- // Ignore users in the user ignore list.
- if (UserIgnoreList && UserIgnoreList->contains(UserInst))
- continue;
-
- LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane "
- << Lane << " from " << *Scalar << ".\n");
- ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane));
+ LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst
+ << " from lane " << Lane << " from " << *Scalar
+ << ".\n");
+ ExternalUses.emplace_back(Scalar, U, FoundLane);
}
}
}
@@ -8384,6 +8382,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
(void)E;
return TTI->getInstructionCost(VI, CostKind);
};
+ // FIXME: Workaround for syntax error reported by MSVC buildbots.
+ TargetTransformInfo &TTIRef = *TTI;
// Need to clear CommonCost since the final shuffle cost is included into
// vector cost.
auto GetVectorCost = [&](InstructionCost) {
@@ -8398,14 +8398,15 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// No need to add new vector costs here since we're going to reuse
// same main/alternate vector ops, just do different shuffling.
} else if (Instruction::isBinaryOp(E->getOpcode())) {
- VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
+ VecCost =
+ TTIRef.getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost +=
- TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
+ TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
- VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
- CI0->getPredicate(), CostKind, VL0);
- VecCost += TTI->getCmpSelInstrCost(
+ VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
+ CI0->getPredicate(), CostKind, VL0);
+ VecCost += TTIRef.getCmpSelInstrCost(
E->getOpcode(), VecTy, MaskTy,
cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
E->getAltOp());
@@ -8414,10 +8415,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
- VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
- TTI::CastContextHint::None, CostKind);
- VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
- TTI::CastContextHint::None, CostKind);
+ VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
+ TTI::CastContextHint::None, CostKind);
+ VecCost +=
+ TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
+ TTI::CastContextHint::None, CostKind);
}
SmallVector<int> Mask;
E->buildAltOpShuffleMask(
@@ -8426,8 +8428,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return I->getOpcode() == E->getAltOpcode();
},
Mask);
- VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
- FinalVecTy, Mask);
+ VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+ FinalVecTy, Mask);
+ // Patterns like [fadd,fsub] can be combined into a single instruction
+ // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we
+ // need to take into account their order when looking for the most used
+ // order.
+ unsigned Opcode0 = E->getOpcode();
+ unsigned Opcode1 = E->getAltOpcode();
+ // The opcode mask selects between the two opcodes.
+ SmallBitVector OpcodeMask(E->Scalars.size(), false);
+ for (unsigned Lane : seq<unsigned>(0, E->Scalars.size()))
+ if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1)
+ OpcodeMask.set(Lane);
+ // If this pattern is supported by the target then we consider the
+ // order.
+ if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
+ InstructionCost AltVecCost = TTIRef.getAltInstrCost(
+ VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
+ return AltVecCost < VecCost ? AltVecCost : VecCost;
+ }
+ // TODO: Check the reverse order too.
return VecCost;
};
return GetCostDiff(GetScalarCost, GetVectorCost);
@@ -11493,17 +11514,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Value *PO = LI->getPointerOperand();
if (E->State == TreeEntry::Vectorize) {
NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign());
-
- // The pointer operand uses an in-tree scalar so we add the new
- // LoadInst to ExternalUses list to make sure that an extract will
- // be generated in the future.
- if (isa<Instruction>(PO)) {
- if (TreeEntry *Entry = getTreeEntry(PO)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(PO);
- ExternalUses.emplace_back(PO, NewLI, FoundLane);
- }
- }
} else {
assert((E->State == TreeEntry::ScatterVectorize ||
E->State == TreeEntry::PossibleStridedVectorize) &&
@@ -11539,17 +11549,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
StoreInst *ST =
Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign());
- // The pointer operand uses an in-tree scalar, so add the new StoreInst to
- // ExternalUses to make sure that an extract will be generated in the
- // future.
- if (isa<Instruction>(Ptr)) {
- if (TreeEntry *Entry = getTreeEntry(Ptr)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(Ptr);
- ExternalUses.push_back(ExternalUser(Ptr, ST, FoundLane));
- }
- }
-
Value *V = propagateMetadata(ST, E->Scalars);
E->VectorizedValue = V;
@@ -11597,10 +11596,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E);
- Intrinsic::ID IID = Intrinsic::not_intrinsic;
- if (Function *FI = CI->getCalledFunction())
- IID = FI->getIntrinsicID();
-
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
@@ -11611,18 +11606,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
SmallVector<Value *> OpVecs;
SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
+ if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
TysForDecl.push_back(
FixedVectorType::get(CI->getType(), E->Scalars.size()));
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
ValueList OpVL;
// Some intrinsics have scalar arguments. This argument should not be
// vectorized.
- if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, I)) {
+ if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) {
CallInst *CEI = cast<CallInst>(VL0);
ScalarArg = CEI->getArgOperand(I);
OpVecs.push_back(CEI->getArgOperand(I));
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I))
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
TysForDecl.push_back(ScalarArg->getType());
continue;
}
@@ -11634,7 +11629,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
}
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I))
+ if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
TysForDecl.push_back(OpVec->getType());
}
@@ -11654,18 +11649,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
CI->getOperandBundlesAsDefs(OpBundles);
Value *V = Builder.CreateCall(CF, OpVecs, OpBundles);
- // The scalar argument uses an in-tree scalar so we add the new vectorized
- // call to ExternalUses list to make sure that an extract will be
- // generated in the future.
- if (isa_and_present<Instruction>(ScalarArg)) {
- if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
- ExternalUses.push_back(
- ExternalUser(ScalarArg, cast<User>(V), FoundLane));
- }
- }
-
propagateIRFlags(V, E->Scalars, VL0);
V = FinalShuffle(V, E, VecTy, IsSigned);
@@ -11877,6 +11860,7 @@ Value *BoUpSLP::vectorizeTree(
DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs;
SmallDenseSet<Value *, 4> UsedInserts;
DenseMap<Value *, Value *> VectorCasts;
+ SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
// Extract all of the elements with the external uses.
for (const auto &ExternalUse : ExternalUses) {
Value *Scalar = ExternalUse.Scalar;
@@ -11947,13 +11931,27 @@ Value *BoUpSLP::vectorizeTree(
VectorToInsertElement.try_emplace(Vec, IE);
return Vec;
};
- // If User == nullptr, the Scalar is used as extra arg. Generate
- // ExtractElement instruction and update the record for this scalar in
- // ExternallyUsedValues.
+ // If User == nullptr, the Scalar remains as scalar in vectorized
+ // instructions or is used as extra arg. Generate ExtractElement instruction
+ // and update the record for this scalar in ExternallyUsedValues.
if (!User) {
- assert(ExternallyUsedValues.count(Scalar) &&
- "Scalar with nullptr as an external user must be registered in "
- "ExternallyUsedValues map");
+ if (!ScalarsWithNullptrUser.insert(Scalar).second)
+ continue;
+ assert((ExternallyUsedValues.count(Scalar) ||
+ any_of(Scalar->users(),
+ [&](llvm::User *U) {
+ TreeEntry *UseEntry = getTreeEntry(U);
+ return UseEntry &&
+ UseEntry->State == TreeEntry::Vectorize &&
+ E->State == TreeEntry::Vectorize &&
+ doesInTreeUserNeedToExtract(
+ Scalar,
+ cast<Instruction>(UseEntry->Scalars.front()),
+ TLI);
+ })) &&
+ "Scalar with nullptr User must be registered in "
+ "ExternallyUsedValues map or remain as scalar in vectorized "
+ "instructions");
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
if (auto *PHI = dyn_cast<PHINode>(VecI))
Builder.SetInsertPoint(PHI->getParent(),
@@ -16222,7 +16220,7 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
for (auto *V : Candidates) {
auto *GEP = cast<GetElementPtrInst>(V);
auto *GEPIdx = GEP->idx_begin()->get();
- assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx));
+ assert(GEP->getNumIndices() == 1 && !isa<Constant>(GEPIdx));
Bundle[BundleIndex++] = GEPIdx;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
index 94cb76889813..7d33baac52c9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1357,6 +1357,36 @@ public:
#endif
};
+/// A recipe to compute the pointers for widened memory accesses of IndexTy for
+/// all parts. If IsReverse is true, compute pointers for accessing the input in
+/// reverse order per part.
+class VPVectorPointerRecipe : public VPRecipeBase, public VPValue {
+ Type *IndexedTy;
+ bool IsReverse;
+
+public:
+ VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
+ DebugLoc DL)
+ : VPRecipeBase(VPDef::VPVectorPointerSC, {Ptr}, DL), VPValue(this),
+ IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+
+ VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
+
+ void execute(VPTransformState &State) override;
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A pure virtual base class for all recipes modeling header phis, including
/// phis for first order recurrences, pointer inductions and reductions. The
/// start value is the first operand of the recipe and the incoming value from
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 02e400d590be..76961629aece 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -498,16 +498,17 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
"DbgInfoIntrinsic should have been dropped during VPlan construction");
State.setDebugLocFrom(CI.getDebugLoc());
+ bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
FunctionType *VFTy = nullptr;
if (Variant)
VFTy = Variant->getFunctionType();
for (unsigned Part = 0; Part < State.UF; ++Part) {
SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
- if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) {
+ if (UseIntrinsic &&
+ isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
TysForDecl.push_back(
VectorType::get(CI.getType()->getScalarType(), State.VF));
- }
SmallVector<Value *, 4> Args;
for (const auto &I : enumerate(operands())) {
// Some intrinsics have a scalar argument - don't replace it with a
@@ -516,18 +517,19 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
// e.g. linear parameters for pointers.
Value *Arg;
if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) ||
- (VectorIntrinsicID != Intrinsic::not_intrinsic &&
+ (UseIntrinsic &&
isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())))
Arg = State.get(I.value(), VPIteration(0, 0));
else
Arg = State.get(I.value(), Part);
- if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
+ if (UseIntrinsic &&
+ isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
TysForDecl.push_back(Arg->getType());
Args.push_back(Arg);
}
Function *VectorF;
- if (VectorIntrinsicID != Intrinsic::not_intrinsic) {
+ if (UseIntrinsic) {
// Use vector version of the intrinsic.
Module *M = State.Builder.GetInsertBlock()->getModule();
VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
@@ -1209,6 +1211,59 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPVectorPointerRecipe ::execute(VPTransformState &State) {
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ // Calculate the pointer for the specific unroll-part.
+ Value *PartPtr = nullptr;
+ // Use i32 for the gep index type when the value is constant,
+ // or query DataLayout for a more suitable index type otherwise.
+ const DataLayout &DL =
+ Builder.GetInsertBlock()->getModule()->getDataLayout();
+ Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
+ ? DL.getIndexType(IndexedTy->getPointerTo())
+ : Builder.getInt32Ty();
+ Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
+ bool InBounds = false;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
+ InBounds = GEP->isInBounds();
+ if (IsReverse) {
+ // If the address is consecutive but reversed, then the
+ // wide store needs to start at the last vector element.
+ // RunTimeVF = VScale * VF.getKnownMinValue()
+ // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
+ Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
+ // NumElt = -Part * RunTimeVF
+ Value *NumElt = Builder.CreateMul(
+ ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
+ // LastLane = 1 - RunTimeVF
+ Value *LastLane =
+ Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
+ PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
+ PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
+ } else {
+ Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
+ PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
+ }
+
+ State.set(this, PartPtr, Part);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = vector-pointer ";
+ if (IsReverse)
+ O << "(reverse) ";
+
+ printOperands(O, SlotTracker);
+}
+#endif
+
void VPBlendRecipe::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
// We know that all PHIs in non-header blocks are converted into
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 116acad8e8f3..8cc98f4abf93 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -351,6 +351,7 @@ public:
VPReductionSC,
VPReplicateSC,
VPScalarIVStepsSC,
+ VPVectorPointerSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,