diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms')
17 files changed, 534 insertions, 308 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index f37b4dc938d3..529f7309a1a2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2951,9 +2951,11 @@ void coro::salvageDebugInfo( // dbg.declare does. if (isa<DbgDeclareInst>(DVI)) { std::optional<BasicBlock::iterator> InsertPt; - if (auto *I = dyn_cast<Instruction>(Storage)) + if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); - else if (isa<Argument>(Storage)) + if (!OptimizeFrame && I->getDebugLoc()) + DVI.setDebugLoc(I->getDebugLoc()); + } else if (isa<Argument>(Storage)) InsertPt = F->getEntryBlock().begin(); if (InsertPt) DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 7c277518b21d..7ebf265e17ba 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -76,6 +76,7 @@ STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); STATISTIC(NumNoAlias, "Number of function returns marked noalias"); STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); +STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef"); STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); STATISTIC(NumNoFree, "Number of functions marked as nofree"); @@ -1279,6 +1280,45 @@ static void addNonNullAttrs(const SCCNodeSet &SCCNodes, } } +/// Deduce noundef attributes for the SCC. +static void addNoUndefAttrs(const SCCNodeSet &SCCNodes, + SmallSet<Function *, 8> &Changed) { + // Check each function in turn, determining which functions return noundef + // values. + for (Function *F : SCCNodes) { + // Already noundef. + if (F->getAttributes().hasRetAttr(Attribute::NoUndef)) + continue; + + // We can infer and propagate function attributes only when we know that the + // definition we'll get at link time is *exactly* the definition we see now. + // For more details, see GlobalValue::mayBeDerefined. + if (!F->hasExactDefinition()) + return; + + // MemorySanitizer assumes that the definition and declaration of a + // function will be consistent. A function with sanitize_memory attribute + // should be skipped from inference. + if (F->hasFnAttribute(Attribute::SanitizeMemory)) + continue; + + if (F->getReturnType()->isVoidTy()) + continue; + + if (all_of(*F, [](BasicBlock &BB) { + if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { + // TODO: perform context-sensitive analysis? + return isGuaranteedNotToBeUndefOrPoison(Ret->getReturnValue()); + } + return true; + })) { + F->addRetAttr(Attribute::NoUndef); + ++NumNoUndefReturn; + Changed.insert(F); + } + } +} + namespace { /// Collects a set of attribute inference requests and performs them all in one @@ -1629,7 +1669,10 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, for (auto &I : BB.instructionsWithoutDebug()) if (auto *CB = dyn_cast<CallBase>(&I)) { Function *Callee = CB->getCalledFunction(); - if (!Callee || Callee == F || !Callee->doesNotRecurse()) + if (!Callee || Callee == F || + (!Callee->doesNotRecurse() && + !(Callee->isDeclaration() && + Callee->hasFnAttribute(Attribute::NoCallback)))) // Function calls a potentially recursive function. return; } @@ -1785,6 +1828,7 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, inferConvergent(Nodes.SCCNodes, Changed); addNoReturnAttrs(Nodes.SCCNodes, Changed); addWillReturn(Nodes.SCCNodes, Changed); + addNoUndefAttrs(Nodes.SCCNodes, Changed); // If we have no external nodes participating in the SCC, we can deduce some // more precise attributes as well. diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 719a2678fc18..556fde37efeb 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1685,8 +1685,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { assert(NotLHS != nullptr && NotRHS != nullptr && "isFreeToInvert desynced with getFreelyInverted"); Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS); - return BinaryOperator::CreateSub(ConstantInt::get(RHS->getType(), -2), - LHSPlusRHS); + return BinaryOperator::CreateSub( + ConstantInt::getSigned(RHS->getType(), -2), LHSPlusRHS); } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 63b1e0f64a88..c03f50d75814 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3513,9 +3513,13 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(Op0, C); // ((B | C) & A) | B -> B | (A & C) - if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) + if (match(Op0, m_c_And(m_c_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C)); + // B | ((B | C) & A) -> B | (A & C) + if (match(Op1, m_c_And(m_c_Or(m_Specific(Op0), m_Value(C)), m_Value(A)))) + return BinaryOperator::CreateOr(Op0, Builder.CreateAnd(A, C)); + if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this)) return DeMorgan; @@ -3872,6 +3876,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { } } + // (X & C1) | C2 -> X & (C1 | C2) iff (X & C2) == C2 + if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C1)))) && + match(Op1, m_APInt(C2))) { + KnownBits KnownX = computeKnownBits(X, /*Depth*/ 0, &I); + if ((KnownX.One & *C2) == *C2) + return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2)); + } + return nullptr; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 3b7fe7fa2266..43d4496571be 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3850,6 +3850,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { if (Callee->hasFnAttribute("thunk")) return false; + // If this is a call to a naked function, the assembly might be + // using an argument, or otherwise rely on the frame layout, + // the function prototype will mismatch. + if (Callee->hasFnAttribute(Attribute::Naked)) + return false; + // If this is a musttail call, the callee's prototype must match the caller's // prototype with the exception of pointee types. The code below doesn't // implement that, so we can't do this transform. diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 289976718e52..3875e59c3ede 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -111,8 +111,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() || - GV->getValueType() != GEP->getSourceElementType() || - !GV->isConstant() || !GV->hasDefinitiveInitializer()) + GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() || + !GV->hasDefinitiveInitializer()) return nullptr; Constant *Init = GV->getInitializer(); @@ -128,8 +128,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( // the simple index into a single-dimensional array. // // Require: GEP GV, 0, i {{, constant indices}} - if (GEP->getNumOperands() < 3 || - !isa<ConstantInt>(GEP->getOperand(1)) || + if (GEP->getNumOperands() < 3 || !isa<ConstantInt>(GEP->getOperand(1)) || !cast<ConstantInt>(GEP->getOperand(1))->isZero() || isa<Constant>(GEP->getOperand(2))) return nullptr; @@ -142,15 +141,18 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Type *EltTy = Init->getType()->getArrayElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (!Idx) return nullptr; // Variable index. + if (!Idx) + return nullptr; // Variable index. uint64_t IdxVal = Idx->getZExtValue(); - if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. + if ((unsigned)IdxVal != IdxVal) + return nullptr; // Too large array index. if (StructType *STy = dyn_cast<StructType>(EltTy)) EltTy = STy->getElementType(IdxVal); else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { - if (IdxVal >= ATy->getNumElements()) return nullptr; + if (IdxVal >= ATy->getNumElements()) + return nullptr; EltTy = ATy->getElementType(); } else { return nullptr; // Unknown type. @@ -191,7 +193,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { Constant *Elt = Init->getAggregateElement(i); - if (!Elt) return nullptr; + if (!Elt) + return nullptr; // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) { @@ -214,16 +217,17 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( if (isa<UndefValue>(C)) { // Extend range state machines to cover this element in case there is an // undef in the middle of the range. - if (TrueRangeEnd == (int)i-1) + if (TrueRangeEnd == (int)i - 1) TrueRangeEnd = i; - if (FalseRangeEnd == (int)i-1) + if (FalseRangeEnd == (int)i - 1) FalseRangeEnd = i; continue; } // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. - if (!isa<ConstantInt>(C)) return nullptr; + if (!isa<ConstantInt>(C)) + return nullptr; // Otherwise, we know if the comparison is true or false for this element, // update our state machines. @@ -233,7 +237,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( if (IsTrueForElt) { // Update the TrueElement state machine. if (FirstTrueElement == Undefined) - FirstTrueElement = TrueRangeEnd = i; // First true element. + FirstTrueElement = TrueRangeEnd = i; // First true element. else { // Update double-compare state machine. if (SecondTrueElement == Undefined) @@ -242,7 +246,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( SecondTrueElement = Overdefined; // Update range state machine. - if (TrueRangeEnd == (int)i-1) + if (TrueRangeEnd == (int)i - 1) TrueRangeEnd = i; else TrueRangeEnd = Overdefined; @@ -259,7 +263,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( SecondFalseElement = Overdefined; // Update range state machine. - if (FalseRangeEnd == (int)i-1) + if (FalseRangeEnd == (int)i - 1) FalseRangeEnd = i; else FalseRangeEnd = Overdefined; @@ -348,7 +352,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( // False for two elements -> 'i != 47 & i != 72'. Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx); - Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); + Value *SecondFalseIdx = + ConstantInt::get(Idx->getType(), SecondFalseElement); Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } @@ -365,8 +370,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Idx = Builder.CreateAdd(Idx, Offs); } - Value *End = ConstantInt::get(Idx->getType(), - TrueRangeEnd-FirstTrueElement+1); + Value *End = + ConstantInt::get(Idx->getType(), TrueRangeEnd - FirstTrueElement + 1); return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); } @@ -380,8 +385,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Idx = Builder.CreateAdd(Idx, Offs); } - Value *End = ConstantInt::get(Idx->getType(), - FalseRangeEnd-FirstFalseElement); + Value *End = + ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement); return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } @@ -4624,27 +4629,35 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, } bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; - if (BO0 && isa<OverflowingBinaryOperator>(BO0)) - NoOp0WrapProblem = - ICmpInst::isEquality(Pred) || - (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || - (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); - if (BO1 && isa<OverflowingBinaryOperator>(BO1)) - NoOp1WrapProblem = - ICmpInst::isEquality(Pred) || - (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || - (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); - + bool Op0HasNUW = false, Op1HasNUW = false; + bool Op0HasNSW = false, Op1HasNSW = false; // Analyze the case when either Op0 or Op1 is an add instruction. // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). + auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred, + bool &HasNSW, bool &HasNUW) -> bool { + if (isa<OverflowingBinaryOperator>(BO)) { + HasNUW = BO.hasNoUnsignedWrap(); + HasNSW = BO.hasNoSignedWrap(); + return ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && HasNUW) || + (CmpInst::isSigned(Pred) && HasNSW); + } else if (BO.getOpcode() == Instruction::Or) { + HasNUW = true; + HasNSW = true; + return true; + } else { + return false; + } + }; Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; - if (BO0 && BO0->getOpcode() == Instruction::Add) { - A = BO0->getOperand(0); - B = BO0->getOperand(1); + + if (BO0) { + match(BO0, m_AddLike(m_Value(A), m_Value(B))); + NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW); } - if (BO1 && BO1->getOpcode() == Instruction::Add) { - C = BO1->getOperand(0); - D = BO1->getOperand(1); + if (BO1) { + match(BO1, m_AddLike(m_Value(C), m_Value(D))); + NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW); } // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow. @@ -4764,17 +4777,15 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, APInt AP2Abs = AP2->abs(); if (AP1Abs.uge(AP2Abs)) { APInt Diff = *AP1 - *AP2; - bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1); - bool HasNSW = BO0->hasNoSignedWrap(); Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff); - Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW); + Value *NewAdd = Builder.CreateAdd( + A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW); return new ICmpInst(Pred, NewAdd, C); } else { APInt Diff = *AP2 - *AP1; - bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2); - bool HasNSW = BO1->hasNoSignedWrap(); Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff); - Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW); + Value *NewAdd = Builder.CreateAdd( + C, C3, "", Op1HasNUW && Diff.ule(*AP2), Op1HasNSW); return new ICmpInst(Pred, A, NewAdd); } } @@ -4868,16 +4879,14 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); // if Z != 0 and nsw(X * Z) and nsw(Y * Z) // X * Z eq/ne Y * Z -> X eq/ne Y - if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() && - BO1->hasNoSignedWrap()) + if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW) return new ICmpInst(Pred, X, Y); } else NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); // If Z != 0 and nuw(X * Z) and nuw(Y * Z) // X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y - if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() && - BO1->hasNoUnsignedWrap()) + if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW) return new ICmpInst(Pred, X, Y); } } @@ -4966,7 +4975,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); case Instruction::SDiv: - if (!I.isEquality() || !BO0->isExact() || !BO1->isExact()) + if (!(I.isEquality() || match(BO0->getOperand(1), m_NonNegative())) || + !BO0->isExact() || !BO1->isExact()) break; return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); @@ -4976,8 +4986,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); case Instruction::Shl: { - bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); - bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); + bool NUW = Op0HasNUW && Op1HasNUW; + bool NSW = Op0HasNSW && Op1HasNSW; if (!NUW && !NSW) break; if (!NSW && I.isSigned()) @@ -5029,10 +5039,10 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, } /// Fold icmp Pred min|max(X, Y), Z. -Instruction * -InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I, - MinMaxIntrinsic *MinMax, Value *Z, - ICmpInst::Predicate Pred) { +Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I, + MinMaxIntrinsic *MinMax, + Value *Z, + ICmpInst::Predicate Pred) { Value *X = MinMax->getLHS(); Value *Y = MinMax->getRHS(); if (ICmpInst::isSigned(Pred) && !MinMax->isSigned()) @@ -5161,24 +5171,6 @@ InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I, return nullptr; } -Instruction *InstCombinerImpl::foldICmpWithMinMax(ICmpInst &Cmp) { - ICmpInst::Predicate Pred = Cmp.getPredicate(); - Value *Lhs = Cmp.getOperand(0); - Value *Rhs = Cmp.getOperand(1); - - if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Lhs)) { - if (Instruction *Res = foldICmpWithMinMaxImpl(Cmp, MinMax, Rhs, Pred)) - return Res; - } - - if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Rhs)) { - if (Instruction *Res = foldICmpWithMinMaxImpl( - Cmp, MinMax, Lhs, ICmpInst::getSwappedPredicate(Pred))) - return Res; - } - - return nullptr; -} // Canonicalize checking for a power-of-2-or-zero value: static Instruction *foldICmpPow2Test(ICmpInst &I, @@ -6843,6 +6835,34 @@ static Instruction *foldReductionIdiom(ICmpInst &I, return nullptr; } +// This helper will be called with icmp operands in both orders. +Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred, + Value *Op0, Value *Op1, + ICmpInst &CxtI) { + // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'. + if (auto *GEP = dyn_cast<GEPOperator>(Op0)) + if (Instruction *NI = foldGEPICmp(GEP, Op1, Pred, CxtI)) + return NI; + + if (auto *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI)) + return NI; + + if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0)) + if (Instruction *Res = foldICmpWithMinMax(CxtI, MinMax, Op1, Pred)) + return Res; + + { + Value *X; + const APInt *C; + // icmp X+Cst, X + if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X) + return foldICmpAddOpConst(X, *C, Pred); + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { bool Changed = false; const SimplifyQuery Q = SQ.getWithInstruction(&I); @@ -6966,20 +6986,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpInstWithConstantNotInt(I)) return Res; - // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'. - if (auto *GEP = dyn_cast<GEPOperator>(Op0)) - if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I)) - return NI; - if (auto *GEP = dyn_cast<GEPOperator>(Op1)) - if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I)) - return NI; - - if (auto *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *NI = foldSelectICmp(I.getPredicate(), SI, Op1, I)) - return NI; - if (auto *SI = dyn_cast<SelectInst>(Op1)) - if (Instruction *NI = foldSelectICmp(I.getSwappedPredicate(), SI, Op0, I)) - return NI; + if (Instruction *Res = foldICmpCommutative(I.getPredicate(), Op0, Op1, I)) + return Res; + if (Instruction *Res = + foldICmpCommutative(I.getSwappedPredicate(), Op1, Op0, I)) + return Res; // In case of a comparison with two select instructions having the same // condition, check whether one of the resulting branches can be simplified. @@ -7030,9 +7041,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *R = foldICmpWithCastOp(I)) return R; - if (Instruction *Res = foldICmpWithMinMax(I)) - return Res; - { Value *X, *Y; // Transform (X & ~Y) == 0 --> (X & Y) != 0 @@ -7134,18 +7142,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { !ACXI->isWeak()) return ExtractValueInst::Create(ACXI, 1); - { - Value *X; - const APInt *C; - // icmp X+Cst, X - if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X) - return foldICmpAddOpConst(X, *C, I.getPredicate()); - - // icmp X, X+Cst - if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X) - return foldICmpAddOpConst(X, *C, I.getSwappedPredicate()); - } - if (Instruction *Res = foldICmpWithHighBitMask(I, Builder)) return Res; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 9e76a0cf17b1..bdaf7550b4b4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -648,9 +648,8 @@ public: Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp, const APInt &C); Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ); - Instruction *foldICmpWithMinMaxImpl(Instruction &I, MinMaxIntrinsic *MinMax, - Value *Z, ICmpInst::Predicate Pred); - Instruction *foldICmpWithMinMax(ICmpInst &Cmp); + Instruction *foldICmpWithMinMax(Instruction &I, MinMaxIntrinsic *MinMax, + Value *Z, ICmpInst::Predicate Pred); Instruction *foldICmpEquality(ICmpInst &Cmp); Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I); Instruction *foldSignBitTest(ICmpInst &I); @@ -708,6 +707,8 @@ public: const APInt &C); Instruction *foldICmpBitCast(ICmpInst &Cmp); Instruction *foldICmpWithTrunc(ICmpInst &Cmp); + Instruction *foldICmpCommutative(ICmpInst::Predicate Pred, Value *Op0, + Value *Op1, ICmpInst &CxtI); // Helpers of visitSelectInst(). Instruction *foldSelectOfBools(SelectInst &SI); diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 20bf00344b14..ab55f235920a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1171,14 +1171,15 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, return nullptr; } -static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, - InstCombinerImpl &IC) { +static Value *canonicalizeSPF(ICmpInst &Cmp, Value *TrueVal, Value *FalseVal, + InstCombinerImpl &IC) { Value *LHS, *RHS; // TODO: What to do with pointer min/max patterns? - if (!Sel.getType()->isIntOrIntVectorTy()) + if (!TrueVal->getType()->isIntOrIntVectorTy()) return nullptr; - SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor; + SelectPatternFlavor SPF = + matchDecomposedSelectPattern(&Cmp, TrueVal, FalseVal, LHS, RHS).Flavor; if (SPF == SelectPatternFlavor::SPF_ABS || SPF == SelectPatternFlavor::SPF_NABS) { if (!Cmp.hasOneUse() && !RHS->hasOneUse()) @@ -1188,13 +1189,13 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && match(RHS, m_NSWNeg(m_Specific(LHS))); Constant *IntMinIsPoisonC = - ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); + ConstantInt::get(Type::getInt1Ty(Cmp.getContext()), IntMinIsPoison); Instruction *Abs = IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); if (SPF == SelectPatternFlavor::SPF_NABS) - return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! - return IC.replaceInstUsesWith(Sel, Abs); + return IC.Builder.CreateNeg(Abs); // Always without NSW flag! + return Abs; } if (SelectPatternResult::isMinOrMax(SPF)) { @@ -1215,8 +1216,7 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, default: llvm_unreachable("Unexpected SPF"); } - return IC.replaceInstUsesWith( - Sel, IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS)); + return IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS); } return nullptr; @@ -1677,8 +1677,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI)) return NewSel; - if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this)) - return NewSPF; + if (Value *V = + canonicalizeSPF(*ICI, SI.getTrueValue(), SI.getFalseValue(), *this)) + return replaceInstUsesWith(SI, V); if (Value *V = foldSelectInstWithICmpConst(SI, ICI, Builder)) return replaceInstUsesWith(SI, V); @@ -2363,6 +2364,9 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel, Value *FVal = Sel.getFalseValue(); Type *SelType = Sel.getType(); + if (ICmpInst::makeCmpResultType(TVal->getType()) != Cond->getType()) + return nullptr; + // Match select ?, TC, FC where the constants are equal but negated. // TODO: Generalize to handle a negated variable operand? const APFloat *TC, *FC; @@ -3790,5 +3794,50 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *I = foldBitCeil(SI, Builder)) return I; + // Fold: + // (select A && B, T, F) -> (select A, (select B, T, F), F) + // (select A || B, T, F) -> (select A, T, (select B, T, F)) + // if (select B, T, F) is foldable. + // TODO: preserve FMF flags + auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A, + Value *B) -> Instruction * { + if (Value *V = simplifySelectInst(B, TrueVal, FalseVal, + SQ.getWithInstruction(&SI))) + return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V); + + // Is (select B, T, F) a SPF? + if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) { + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(B)) + if (Value *V = canonicalizeSPF(*Cmp, TrueVal, FalseVal, *this)) + return SelectInst::Create(A, IsAnd ? V : TrueVal, + IsAnd ? FalseVal : V); + } + + return nullptr; + }; + + Value *LHS, *RHS; + if (match(CondVal, m_And(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS)) + return I; + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, RHS, LHS)) + return I; + } else if (match(CondVal, m_Or(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS)) + return I; + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, RHS, LHS)) + return I; + } else { + // We cannot swap the operands of logical and/or. + // TODO: Can we swap the operands by inserting a freeze? + if (match(CondVal, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS)) + return I; + } else if (match(CondVal, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS)) + return I; + } + } + return nullptr; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 7f5a7b666903..351fc3b0174f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2469,31 +2469,43 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { DL.getIndexSizeInBits(AS)) { uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue(); - bool Matched = false; - uint64_t C; - Value *V = nullptr; if (TyAllocSize == 1) { - V = GEP.getOperand(1); - Matched = true; - } else if (match(GEP.getOperand(1), - m_AShr(m_Value(V), m_ConstantInt(C)))) { - if (TyAllocSize == 1ULL << C) - Matched = true; - } else if (match(GEP.getOperand(1), - m_SDiv(m_Value(V), m_ConstantInt(C)))) { - if (TyAllocSize == C) - Matched = true; + // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), + // but only if the result pointer is only used as if it were an integer, + // or both point to the same underlying object (otherwise provenance is + // not necessarily retained). + Value *X = GEP.getPointerOperand(); + Value *Y; + if (match(GEP.getOperand(1), + m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) && + GEPType == Y->getType()) { + bool HasSameUnderlyingObject = + getUnderlyingObject(X) == getUnderlyingObject(Y); + bool Changed = false; + GEP.replaceUsesWithIf(Y, [&](Use &U) { + bool ShouldReplace = HasSameUnderlyingObject || + isa<ICmpInst>(U.getUser()) || + isa<PtrToIntInst>(U.getUser()); + Changed |= ShouldReplace; + return ShouldReplace; + }); + return Changed ? &GEP : nullptr; + } + } else { + // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V) + Value *V; + if ((has_single_bit(TyAllocSize) && + match(GEP.getOperand(1), + m_Exact(m_AShr(m_Value(V), + m_SpecificInt(countr_zero(TyAllocSize)))))) || + match(GEP.getOperand(1), + m_Exact(m_SDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) { + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + Builder.getInt8Ty(), GEP.getPointerOperand(), V); + NewGEP->setIsInBounds(GEP.isInBounds()); + return NewGEP; + } } - - // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but - // only if both point to the same underlying object (otherwise provenance - // is not necessarily retained). - Value *Y; - Value *X = GEP.getOperand(0); - if (Matched && - match(V, m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) && - getUnderlyingObject(X) == getUnderlyingObject(Y)) - return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType); } } // We do not handle pointer-vector geps here. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 899d7e0a11e6..06c87bd6dc37 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -366,6 +366,13 @@ struct Decomposition { append_range(Vars, Other.Vars); } + void sub(const Decomposition &Other) { + Decomposition Tmp = Other; + Tmp.mul(-1); + add(Tmp.Offset); + append_range(Vars, Tmp.Vars); + } + void mul(int64_t Factor) { Offset = multiplyWithOverflow(Offset, Factor); for (auto &Var : Vars) @@ -569,10 +576,12 @@ static Decomposition decompose(Value *V, return Result; } - if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI)) - return {-1 * CI->getSExtValue(), {{1, Op0}}}; - if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) - return {0, {{1, Op0}, {-1, Op1}}}; + if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) { + auto ResA = decompose(Op0, Preconditions, IsSigned, DL); + auto ResB = decompose(Op1, Preconditions, IsSigned, DL); + ResA.sub(ResB); + return ResA; + } return {V, IsKnownNonNegative}; } @@ -1010,22 +1019,14 @@ void State::addInfoFor(BasicBlock &BB) { continue; } - if (match(&I, m_Intrinsic<Intrinsic::ssub_with_overflow>())) { - WorkList.push_back( - FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I))); - continue; - } - - if (isa<MinMaxIntrinsic>(&I)) { - WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I)); - continue; - } - - Value *A, *B; - CmpInst::Predicate Pred; - // For now, just handle assumes with a single compare as condition. - if (match(&I, m_Intrinsic<Intrinsic::assume>( - m_ICmp(Pred, m_Value(A), m_Value(B))))) { + auto *II = dyn_cast<IntrinsicInst>(&I); + Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic; + switch (ID) { + case Intrinsic::assume: { + Value *A, *B; + CmpInst::Predicate Pred; + if (!match(I.getOperand(0), m_ICmp(Pred, m_Value(A), m_Value(B)))) + break; if (GuaranteedToExecute) { // The assume is guaranteed to execute when BB is entered, hence Cond // holds on entry to BB. @@ -1035,7 +1036,23 @@ void State::addInfoFor(BasicBlock &BB) { WorkList.emplace_back( FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I)); } + break; + } + // Enqueue ssub_with_overflow for simplification. + case Intrinsic::ssub_with_overflow: + WorkList.push_back( + FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I))); + break; + // Enqueue the intrinsics to add extra info. + case Intrinsic::abs: + case Intrinsic::umin: + case Intrinsic::umax: + case Intrinsic::smin: + case Intrinsic::smax: + WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I)); + break; } + GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I); } @@ -1693,6 +1710,13 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, ICmpInst::Predicate Pred; if (!CB.isConditionFact()) { + Value *X; + if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) { + // TODO: Add CB.Inst >= 0 fact. + AddFact(CmpInst::ICMP_SGE, CB.Inst, X); + continue; + } + if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) { Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate()); AddFact(Pred, MinMax, MinMax->getLHS()); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp index fb4d82885377..282c44563466 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp @@ -29,9 +29,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" @@ -66,19 +67,6 @@ class CanonicalizeFreezeInLoopsImpl { ScalarEvolution &SE; DominatorTree &DT; - struct FrozenIndPHIInfo { - // A freeze instruction that uses an induction phi - FreezeInst *FI = nullptr; - // The induction phi, step instruction, the operand idx of StepInst which is - // a step value - PHINode *PHI; - BinaryOperator *StepInst; - unsigned StepValIdx = 0; - - FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst) - : PHI(PHI), StepInst(StepInst) {} - }; - // Can freeze instruction be pushed into operands of I? // In order to do this, I should not create a poison after I's flags are // stripped. @@ -99,6 +87,46 @@ public: } // anonymous namespace +namespace llvm { + +struct FrozenIndPHIInfo { + // A freeze instruction that uses an induction phi + FreezeInst *FI = nullptr; + // The induction phi, step instruction, the operand idx of StepInst which is + // a step value + PHINode *PHI; + BinaryOperator *StepInst; + unsigned StepValIdx = 0; + + FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst) + : PHI(PHI), StepInst(StepInst) {} + + bool operator==(const FrozenIndPHIInfo &Other) { return FI == Other.FI; } +}; + +template <> struct DenseMapInfo<FrozenIndPHIInfo> { + static inline FrozenIndPHIInfo getEmptyKey() { + return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getEmptyKey(), + DenseMapInfo<BinaryOperator *>::getEmptyKey()); + } + + static inline FrozenIndPHIInfo getTombstoneKey() { + return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getTombstoneKey(), + DenseMapInfo<BinaryOperator *>::getTombstoneKey()); + } + + static unsigned getHashValue(const FrozenIndPHIInfo &Val) { + return DenseMapInfo<FreezeInst *>::getHashValue(Val.FI); + }; + + static bool isEqual(const FrozenIndPHIInfo &LHS, + const FrozenIndPHIInfo &RHS) { + return LHS.FI == RHS.FI; + }; +}; + +} // end namespace llvm + // Given U = (value, user), replace value with freeze(value), and let // SCEV forget user. The inserted freeze is placed in the preheader. void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) { @@ -126,7 +154,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() { if (!L->isLoopSimplifyForm()) return false; - SmallVector<FrozenIndPHIInfo, 4> Candidates; + SmallSetVector<FrozenIndPHIInfo, 4> Candidates; for (auto &PHI : L->getHeader()->phis()) { InductionDescriptor ID; @@ -155,7 +183,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() { if (auto *FI = dyn_cast<FreezeInst>(U)) { LLVM_DEBUG(dbgs() << "canonfr: found: " << *FI << "\n"); Info.FI = FI; - Candidates.push_back(Info); + Candidates.insert(Info); } }; for_each(PHI.users(), Visit); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index a758fb306982..c76cc9db16d7 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -3593,8 +3593,9 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C, if (isa<ConstantInt>(C)) return createIntegerExpression(C); - if (Ty.isFloatTy() || Ty.isDoubleTy()) { - const APFloat &APF = cast<ConstantFP>(&C)->getValueAPF(); + auto *FP = dyn_cast<ConstantFP>(&C); + if (FP && (Ty.isFloatTy() || Ty.isDoubleTy())) { + const APFloat &APF = FP->getValueAPF(); return DIB.createConstantValueExpression( APF.bitcastToAPInt().getZExtValue()); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f82e161fb846..8e135d80f4f2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8174,13 +8174,20 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, bool Consecutive = Reverse || Decision == LoopVectorizationCostModel::CM_Widen; + VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1]; + if (Consecutive) { + auto *VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), + Reverse, I->getDebugLoc()); + Builder.getInsertBlock()->appendRecipe(VectorPtr); + Ptr = VectorPtr; + } if (LoadInst *Load = dyn_cast<LoadInst>(I)) - return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask, - Consecutive, Reverse); + return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive, + Reverse); StoreInst *Store = cast<StoreInst>(I); - return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0], - Mask, Consecutive, Reverse); + return new VPWidenMemoryInstructionRecipe(*Store, Ptr, Operands[0], Mask, + Consecutive, Reverse); } /// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also @@ -9475,8 +9482,8 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF); bool isMaskRequired = getMask(); if (isMaskRequired) { - // Mask reversal is only neede for non-all-one (null) masks, as reverse of a - // null all-one mask is a null mask. + // Mask reversal is only needed for non-all-one (null) masks, as reverse of + // a null all-one mask is a null mask. for (unsigned Part = 0; Part < State.UF; ++Part) { Value *Mask = State.get(getMask(), Part); if (isReverse()) @@ -9485,44 +9492,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { } } - const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * { - // Calculate the pointer for the specific unroll-part. - Value *PartPtr = nullptr; - - // Use i32 for the gep index type when the value is constant, - // or query DataLayout for a more suitable index type otherwise. - const DataLayout &DL = - Builder.GetInsertBlock()->getModule()->getDataLayout(); - Type *IndexTy = State.VF.isScalable() && (isReverse() || Part > 0) - ? DL.getIndexType(PointerType::getUnqual( - ScalarDataTy->getContext())) - : Builder.getInt32Ty(); - bool InBounds = false; - if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) - InBounds = gep->isInBounds(); - if (isReverse()) { - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - // RunTimeVF = VScale * VF.getKnownMinValue() - // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() - Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); - // NumElt = -Part * RunTimeVF - Value *NumElt = - Builder.CreateMul(ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); - // LastLane = 1 - RunTimeVF - Value *LastLane = - Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); - PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds); - PartPtr = - Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds); - } else { - Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); - PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds); - } - - return PartPtr; - }; - // Handle Stores: if (SI) { State.setDebugLocFrom(SI->getDebugLoc()); @@ -9543,8 +9512,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { // We don't want to update the value in the map as it might be used in // another expression. So don't call resetVectorValue(StoredVal). } - auto *VecPtr = - CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + auto *VecPtr = State.get(getAddr(), Part); if (isMaskRequired) NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, BlockInMaskParts[Part]); @@ -9568,8 +9536,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { nullptr, "wide.masked.gather"); State.addMetadata(NewLI, LI); } else { - auto *VecPtr = - CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + auto *VecPtr = State.get(getAddr(), Part); if (isMaskRequired) NewLI = Builder.CreateMaskedLoad( DataTy, VecPtr, Alignment, BlockInMaskParts[Part], diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 32913b3f5569..304991526064 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4925,36 +4925,34 @@ void BoUpSLP::buildExternalUses( LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); Instruction *UserInst = dyn_cast<Instruction>(U); - if (!UserInst) + if (!UserInst || isDeleted(UserInst)) continue; - if (isDeleted(UserInst)) + // Ignore users in the user ignore list. + if (UserIgnoreList && UserIgnoreList->contains(UserInst)) continue; // Skip in-tree scalars that become vectors if (TreeEntry *UseEntry = getTreeEntry(U)) { - Value *UseScalar = UseEntry->Scalars[0]; // Some in-tree scalars will remain as scalar in vectorized - // instructions. If that is the case, the one in Lane 0 will + // instructions. If that is the case, the one in FoundLane will // be used. - if (UseScalar != U || - UseEntry->State == TreeEntry::ScatterVectorize || + if (UseEntry->State == TreeEntry::ScatterVectorize || UseEntry->State == TreeEntry::PossibleStridedVectorize || - !doesInTreeUserNeedToExtract(Scalar, UserInst, TLI)) { + !doesInTreeUserNeedToExtract( + Scalar, cast<Instruction>(UseEntry->Scalars.front()), TLI)) { LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U << ".\n"); assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state"); continue; } + U = nullptr; } - // Ignore users in the user ignore list. - if (UserIgnoreList && UserIgnoreList->contains(UserInst)) - continue; - - LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " - << Lane << " from " << *Scalar << ".\n"); - ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane)); + LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst + << " from lane " << Lane << " from " << *Scalar + << ".\n"); + ExternalUses.emplace_back(Scalar, U, FoundLane); } } } @@ -8384,6 +8382,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, (void)E; return TTI->getInstructionCost(VI, CostKind); }; + // FIXME: Workaround for syntax error reported by MSVC buildbots. + TargetTransformInfo &TTIRef = *TTI; // Need to clear CommonCost since the final shuffle cost is included into // vector cost. auto GetVectorCost = [&](InstructionCost) { @@ -8398,14 +8398,15 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, // No need to add new vector costs here since we're going to reuse // same main/alternate vector ops, just do different shuffling. } else if (Instruction::isBinaryOp(E->getOpcode())) { - VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); + VecCost = + TTIRef.getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); VecCost += - TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); + TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) { auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); - VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - CI0->getPredicate(), CostKind, VL0); - VecCost += TTI->getCmpSelInstrCost( + VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, + CI0->getPredicate(), CostKind, VL0); + VecCost += TTIRef.getCmpSelInstrCost( E->getOpcode(), VecTy, MaskTy, cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind, E->getAltOp()); @@ -8414,10 +8415,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size()); auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size()); - VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, - TTI::CastContextHint::None, CostKind); - VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, - TTI::CastContextHint::None, CostKind); + VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, + TTI::CastContextHint::None, CostKind); + VecCost += + TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, + TTI::CastContextHint::None, CostKind); } SmallVector<int> Mask; E->buildAltOpShuffleMask( @@ -8426,8 +8428,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, return I->getOpcode() == E->getAltOpcode(); }, Mask); - VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - FinalVecTy, Mask); + VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + FinalVecTy, Mask); + // Patterns like [fadd,fsub] can be combined into a single instruction + // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we + // need to take into account their order when looking for the most used + // order. + unsigned Opcode0 = E->getOpcode(); + unsigned Opcode1 = E->getAltOpcode(); + // The opcode mask selects between the two opcodes. + SmallBitVector OpcodeMask(E->Scalars.size(), false); + for (unsigned Lane : seq<unsigned>(0, E->Scalars.size())) + if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1) + OpcodeMask.set(Lane); + // If this pattern is supported by the target then we consider the + // order. + if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) { + InstructionCost AltVecCost = TTIRef.getAltInstrCost( + VecTy, Opcode0, Opcode1, OpcodeMask, CostKind); + return AltVecCost < VecCost ? AltVecCost : VecCost; + } + // TODO: Check the reverse order too. return VecCost; }; return GetCostDiff(GetScalarCost, GetVectorCost); @@ -11493,17 +11514,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *PO = LI->getPointerOperand(); if (E->State == TreeEntry::Vectorize) { NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign()); - - // The pointer operand uses an in-tree scalar so we add the new - // LoadInst to ExternalUses list to make sure that an extract will - // be generated in the future. - if (isa<Instruction>(PO)) { - if (TreeEntry *Entry = getTreeEntry(PO)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(PO); - ExternalUses.emplace_back(PO, NewLI, FoundLane); - } - } } else { assert((E->State == TreeEntry::ScatterVectorize || E->State == TreeEntry::PossibleStridedVectorize) && @@ -11539,17 +11549,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { StoreInst *ST = Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign()); - // The pointer operand uses an in-tree scalar, so add the new StoreInst to - // ExternalUses to make sure that an extract will be generated in the - // future. - if (isa<Instruction>(Ptr)) { - if (TreeEntry *Entry = getTreeEntry(Ptr)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(Ptr); - ExternalUses.push_back(ExternalUser(Ptr, ST, FoundLane)); - } - } - Value *V = propagateMetadata(ST, E->Scalars); E->VectorizedValue = V; @@ -11597,10 +11596,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { CallInst *CI = cast<CallInst>(VL0); setInsertPointAfterBundle(E); - Intrinsic::ID IID = Intrinsic::not_intrinsic; - if (Function *FI = CI->getCalledFunction()) - IID = FI->getIntrinsicID(); - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); @@ -11611,18 +11606,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { SmallVector<Value *> OpVecs; SmallVector<Type *, 2> TysForDecl; // Add return type if intrinsic is overloaded on it. - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1)) + if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); for (unsigned I : seq<unsigned>(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. - if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, I)) { + if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { CallInst *CEI = cast<CallInst>(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I)) + if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(ScalarArg->getType()); continue; } @@ -11634,7 +11629,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I)) + if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(OpVec->getType()); } @@ -11654,18 +11649,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { CI->getOperandBundlesAsDefs(OpBundles); Value *V = Builder.CreateCall(CF, OpVecs, OpBundles); - // The scalar argument uses an in-tree scalar so we add the new vectorized - // call to ExternalUses list to make sure that an extract will be - // generated in the future. - if (isa_and_present<Instruction>(ScalarArg)) { - if (TreeEntry *Entry = getTreeEntry(ScalarArg)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(ScalarArg); - ExternalUses.push_back( - ExternalUser(ScalarArg, cast<User>(V), FoundLane)); - } - } - propagateIRFlags(V, E->Scalars, VL0); V = FinalShuffle(V, E, VecTy, IsSigned); @@ -11877,6 +11860,7 @@ Value *BoUpSLP::vectorizeTree( DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs; SmallDenseSet<Value *, 4> UsedInserts; DenseMap<Value *, Value *> VectorCasts; + SmallDenseSet<Value *, 4> ScalarsWithNullptrUser; // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -11947,13 +11931,27 @@ Value *BoUpSLP::vectorizeTree( VectorToInsertElement.try_emplace(Vec, IE); return Vec; }; - // If User == nullptr, the Scalar is used as extra arg. Generate - // ExtractElement instruction and update the record for this scalar in - // ExternallyUsedValues. + // If User == nullptr, the Scalar remains as scalar in vectorized + // instructions or is used as extra arg. Generate ExtractElement instruction + // and update the record for this scalar in ExternallyUsedValues. if (!User) { - assert(ExternallyUsedValues.count(Scalar) && - "Scalar with nullptr as an external user must be registered in " - "ExternallyUsedValues map"); + if (!ScalarsWithNullptrUser.insert(Scalar).second) + continue; + assert((ExternallyUsedValues.count(Scalar) || + any_of(Scalar->users(), + [&](llvm::User *U) { + TreeEntry *UseEntry = getTreeEntry(U); + return UseEntry && + UseEntry->State == TreeEntry::Vectorize && + E->State == TreeEntry::Vectorize && + doesInTreeUserNeedToExtract( + Scalar, + cast<Instruction>(UseEntry->Scalars.front()), + TLI); + })) && + "Scalar with nullptr User must be registered in " + "ExternallyUsedValues map or remain as scalar in vectorized " + "instructions"); if (auto *VecI = dyn_cast<Instruction>(Vec)) { if (auto *PHI = dyn_cast<PHINode>(VecI)) Builder.SetInsertPoint(PHI->getParent(), @@ -16222,7 +16220,7 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { for (auto *V : Candidates) { auto *GEP = cast<GetElementPtrInst>(V); auto *GEPIdx = GEP->idx_begin()->get(); - assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx)); + assert(GEP->getNumIndices() == 1 && !isa<Constant>(GEPIdx)); Bundle[BundleIndex++] = GEPIdx; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h index 94cb76889813..7d33baac52c9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1357,6 +1357,36 @@ public: #endif }; +/// A recipe to compute the pointers for widened memory accesses of IndexTy for +/// all parts. If IsReverse is true, compute pointers for accessing the input in +/// reverse order per part. +class VPVectorPointerRecipe : public VPRecipeBase, public VPValue { + Type *IndexedTy; + bool IsReverse; + +public: + VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, + DebugLoc DL) + : VPRecipeBase(VPDef::VPVectorPointerSC, {Ptr}, DL), VPValue(this), + IndexedTy(IndexedTy), IsReverse(IsReverse) {} + + VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) + + void execute(VPTransformState &State) override; + + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A pure virtual base class for all recipes modeling header phis, including /// phis for first order recurrences, pointer inductions and reductions. The /// start value is the first operand of the recipe and the incoming value from diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 02e400d590be..76961629aece 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -498,16 +498,17 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { "DbgInfoIntrinsic should have been dropped during VPlan construction"); State.setDebugLocFrom(CI.getDebugLoc()); + bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; FunctionType *VFTy = nullptr; if (Variant) VFTy = Variant->getFunctionType(); for (unsigned Part = 0; Part < State.UF; ++Part) { SmallVector<Type *, 2> TysForDecl; // Add return type if intrinsic is overloaded on it. - if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) { + if (UseIntrinsic && + isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) TysForDecl.push_back( VectorType::get(CI.getType()->getScalarType(), State.VF)); - } SmallVector<Value *, 4> Args; for (const auto &I : enumerate(operands())) { // Some intrinsics have a scalar argument - don't replace it with a @@ -516,18 +517,19 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { // e.g. linear parameters for pointers. Value *Arg; if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) || - (VectorIntrinsicID != Intrinsic::not_intrinsic && + (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))) Arg = State.get(I.value(), VPIteration(0, 0)); else Arg = State.get(I.value(), Part); - if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) + if (UseIntrinsic && + isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) TysForDecl.push_back(Arg->getType()); Args.push_back(Arg); } Function *VectorF; - if (VectorIntrinsicID != Intrinsic::not_intrinsic) { + if (UseIntrinsic) { // Use vector version of the intrinsic. Module *M = State.Builder.GetInsertBlock()->getModule(); VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); @@ -1209,6 +1211,59 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +void VPVectorPointerRecipe ::execute(VPTransformState &State) { + auto &Builder = State.Builder; + State.setDebugLocFrom(getDebugLoc()); + for (unsigned Part = 0; Part < State.UF; ++Part) { + // Calculate the pointer for the specific unroll-part. + Value *PartPtr = nullptr; + // Use i32 for the gep index type when the value is constant, + // or query DataLayout for a more suitable index type otherwise. + const DataLayout &DL = + Builder.GetInsertBlock()->getModule()->getDataLayout(); + Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0) + ? DL.getIndexType(IndexedTy->getPointerTo()) + : Builder.getInt32Ty(); + Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); + bool InBounds = false; + if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) + InBounds = GEP->isInBounds(); + if (IsReverse) { + // If the address is consecutive but reversed, then the + // wide store needs to start at the last vector element. + // RunTimeVF = VScale * VF.getKnownMinValue() + // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() + Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); + // NumElt = -Part * RunTimeVF + Value *NumElt = Builder.CreateMul( + ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); + // LastLane = 1 - RunTimeVF + Value *LastLane = + Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); + PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); + PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds); + } else { + Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); + PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); + } + + State.set(this, PartPtr, Part); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << " = vector-pointer "; + if (IsReverse) + O << "(reverse) "; + + printOperands(O, SlotTracker); +} +#endif + void VPBlendRecipe::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); // We know that all PHIs in non-header blocks are converted into diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h index 116acad8e8f3..8cc98f4abf93 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -351,6 +351,7 @@ public: VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, + VPVectorPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, |
