Vendor import of llvm-project main llvmorg-18-init-14265-ga17671084db1. - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-12-09 13:28:42 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-12-09 13:28:42 +0000
commit	b1c73532ee8997fe5dfbeb7d223027bdf99758a0 (patch)
tree	7d6e51c294ab6719475d660217aa0c0ad0526292 /llvm/lib/Transforms/InstCombine
parent	7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)

vendor/llvm-project/llvmorg-18-init-14265-ga17671084db1

Diffstat (limited to 'llvm/lib/Transforms/InstCombine')

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

274

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

593

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

236

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

140

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

1087

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

189

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

454

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp

133

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

354

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

163

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

246

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

619

15 files changed, 2895 insertions, 1786 deletions

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 91ca44e0f11e..719a2678fc18 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

@@ -830,15 +830,15 @@ static Instruction *foldNoWrapAdd(BinaryOperator &Add,

// (sext (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)

Constant *NarrowC;

if (match(Op0, m_OneUse(m_SExt(m_NSWAdd(m_Value(X), m_Constant(NarrowC)))))) {

- Constant *WideC = ConstantExpr::getSExt(NarrowC, Ty);

- Constant *NewC = ConstantExpr::getAdd(WideC, Op1C);

+ Value *WideC = Builder.CreateSExt(NarrowC, Ty);

+ Value *NewC = Builder.CreateAdd(WideC, Op1C);

Value *WideX = Builder.CreateSExt(X, Ty);

return BinaryOperator::CreateAdd(WideX, NewC);

}

// (zext (X +nuw NarrowC)) + C --> (zext X) + (zext(NarrowC) + C)

if (match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_Constant(NarrowC)))))) {

- Constant *WideC = ConstantExpr::getZExt(NarrowC, Ty);

- Constant *NewC = ConstantExpr::getAdd(WideC, Op1C);

+ Value *WideC = Builder.CreateZExt(NarrowC, Ty);

+ Value *NewC = Builder.CreateAdd(WideC, Op1C);

Value *WideX = Builder.CreateZExt(X, Ty);

return BinaryOperator::CreateAdd(WideX, NewC);

}

@@ -903,8 +903,7 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {

// (X | Op01C) + Op1C --> X + (Op01C + Op1C) iff the `or` is actually an `add`

Constant *Op01C;

- if (match(Op0, m_Or(m_Value(X), m_ImmConstant(Op01C))) &&

- haveNoCommonBitsSet(X, Op01C, DL, &AC, &Add, &DT))

+ if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C))))

return BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C));

// (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)

@@ -995,6 +994,69 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {

return nullptr;

}

+// match variations of a^2 + 2*a*b + b^2

+//

+// to reuse the code between the FP and Int versions, the instruction OpCodes

+// and constant types have been turned into template parameters.

+//

+// Mul2Rhs: The constant to perform the multiplicative equivalent of X*2 with;

+// should be `m_SpecificFP(2.0)` for FP and `m_SpecificInt(1)` for Int

+// (we're matching `X<<1` instead of `X*2` for Int)

+template <bool FP, typename Mul2Rhs>

+static bool matchesSquareSum(BinaryOperator &I, Mul2Rhs M2Rhs, Value *&A,

+ Value *&B) {

+ constexpr unsigned MulOp = FP ? Instruction::FMul : Instruction::Mul;

+ constexpr unsigned AddOp = FP ? Instruction::FAdd : Instruction::Add;

+ constexpr unsigned Mul2Op = FP ? Instruction::FMul : Instruction::Shl;

+ // (a * a) + (((a * 2) + b) * b)

+ if (match(&I, m_c_BinOp(

+ AddOp, m_OneUse(m_BinOp(MulOp, m_Value(A), m_Deferred(A))),

+ m_OneUse(m_BinOp(

+ MulOp,

+ m_c_BinOp(AddOp, m_BinOp(Mul2Op, m_Deferred(A), M2Rhs),

+ m_Value(B)),

+ m_Deferred(B))))))

+ return true;

+ // ((a * b) * 2) or ((a * 2) * b)

+ // +

+ // (a * a + b * b) or (b * b + a * a)

+ return match(

+ &I,

+ m_c_BinOp(AddOp,

+ m_CombineOr(

+ m_OneUse(m_BinOp(

+ Mul2Op, m_BinOp(MulOp, m_Value(A), m_Value(B)), M2Rhs)),

+ m_OneUse(m_BinOp(MulOp, m_BinOp(Mul2Op, m_Value(A), M2Rhs),

+ m_Value(B)))),

+ m_OneUse(m_c_BinOp(

+ AddOp, m_BinOp(MulOp, m_Deferred(A), m_Deferred(A)),

+ m_BinOp(MulOp, m_Deferred(B), m_Deferred(B))))));

+// Fold integer variations of a^2 + 2*a*b + b^2 -> (a + b)^2

+Instruction *InstCombinerImpl::foldSquareSumInt(BinaryOperator &I) {

+ Value *A, *B;

+ if (matchesSquareSum</*FP*/ false>(I, m_SpecificInt(1), A, B)) {

+ Value *AB = Builder.CreateAdd(A, B);

+ return BinaryOperator::CreateMul(AB, AB);

+ }

+ return nullptr;

+// Fold floating point variations of a^2 + 2*a*b + b^2 -> (a + b)^2

+// Requires `nsz` and `reassoc`.

+Instruction *InstCombinerImpl::foldSquareSumFP(BinaryOperator &I) {

+ assert(I.hasAllowReassoc() && I.hasNoSignedZeros() && "Assumption mismatch");

+ Value *A, *B;

+ if (matchesSquareSum</*FP*/ true>(I, m_SpecificFP(2.0), A, B)) {

+ Value *AB = Builder.CreateFAddFMF(A, B, &I);

+ return BinaryOperator::CreateFMulFMF(AB, AB, &I);

+ }

+ return nullptr;

// Matches multiplication expression Op * C where C is a constant. Returns the

// constant value in C and the other operand in Op. Returns true if such a

// match is found.

@@ -1146,6 +1208,21 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {

return nullptr;

}

+// Transform:

+// (add A, (shl (neg B), Y))

+// -> (sub A, (shl B, Y))

+static Instruction *combineAddSubWithShlAddSub(InstCombiner::BuilderTy &Builder,

+ const BinaryOperator &I) {

+ Value *A, *B, *Cnt;

+ if (match(&I,

+ m_c_Add(m_OneUse(m_Shl(m_OneUse(m_Neg(m_Value(B))), m_Value(Cnt))),

+ m_Value(A)))) {

+ Value *NewShl = Builder.CreateShl(B, Cnt);

+ return BinaryOperator::CreateSub(A, NewShl);

+ }

+ return nullptr;

/// Try to reduce signed division by power-of-2 to an arithmetic shift right.

static Instruction *foldAddToAshr(BinaryOperator &Add) {

// Division must be by power-of-2, but not the minimum signed value.

@@ -1156,18 +1233,28 @@ static Instruction *foldAddToAshr(BinaryOperator &Add) {

return nullptr;

// Rounding is done by adding -1 if the dividend (X) is negative and has any

- // low bits set. The canonical pattern for that is an "ugt" compare with SMIN:

- // sext (icmp ugt (X & (DivC - 1)), SMIN)

- const APInt *MaskC;

+ // low bits set. It recognizes two canonical patterns:

+ // 1. For an 'ugt' cmp with the signed minimum value (SMIN), the

+ // pattern is: sext (icmp ugt (X & (DivC - 1)), SMIN).

+ // 2. For an 'eq' cmp, the pattern's: sext (icmp eq X & (SMIN + 1), SMIN + 1).

+ // Note that, by the time we end up here, if possible, ugt has been

+ // canonicalized into eq.

+ const APInt *MaskC, *MaskCCmp;

ICmpInst::Predicate Pred;

if (!match(Add.getOperand(1),

m_SExt(m_ICmp(Pred, m_And(m_Specific(X), m_APInt(MaskC)),

- m_SignMask()))) ||

- Pred != ICmpInst::ICMP_UGT)

+ m_APInt(MaskCCmp)))))

+ return nullptr;

+ if ((Pred != ICmpInst::ICMP_UGT || !MaskCCmp->isSignMask()) &&

+ (Pred != ICmpInst::ICMP_EQ || *MaskCCmp != *MaskC))

return nullptr;

APInt SMin = APInt::getSignedMinValue(Add.getType()->getScalarSizeInBits());

- if (*MaskC != (SMin | (*DivC - 1)))

+ bool IsMaskValid = Pred == ICmpInst::ICMP_UGT

+ ? (*MaskC == (SMin | (*DivC - 1)))

+ : (*DivC == 2 && *MaskC == SMin + 1);

+ if (!IsMaskValid)

return nullptr;

// (X / DivC) + sext ((X & (SMin | (DivC - 1)) >u SMin) --> X >>s log2(DivC)

@@ -1327,8 +1414,10 @@ static Instruction *foldBoxMultiply(BinaryOperator &I) {

// ResLo = (CrossSum << HalfBits) + (YLo * XLo)

Value *XLo, *YLo;

Value *CrossSum;

+ // Require one-use on the multiply to avoid increasing the number of

+ // multiplications.

if (!match(&I, m_c_Add(m_Shl(m_Value(CrossSum), m_SpecificInt(HalfBits)),

- m_Mul(m_Value(YLo), m_Value(XLo)))))

+ m_OneUse(m_Mul(m_Value(YLo), m_Value(XLo))))))

return nullptr;

// XLo = X & HalfMask

@@ -1386,6 +1475,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {

if (Instruction *R = foldBinOpShiftWithShift(I))

return R;

+ if (Instruction *R = combineAddSubWithShlAddSub(Builder, I))

+ return R;

Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);

Type *Ty = I.getType();

if (Ty->isIntOrIntVectorTy(1))

@@ -1406,7 +1498,11 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {

return BinaryOperator::CreateNeg(Builder.CreateAdd(A, B));

// -A + B --> B - A

- return BinaryOperator::CreateSub(RHS, A);

+ auto *Sub = BinaryOperator::CreateSub(RHS, A);

+ auto *OB0 = cast<OverflowingBinaryOperator>(LHS);

+ Sub->setHasNoSignedWrap(I.hasNoSignedWrap() && OB0->hasNoSignedWrap());

+ return Sub;

}

// A + -B --> A - B

@@ -1485,8 +1581,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {

return replaceInstUsesWith(I, Constant::getNullValue(I.getType()));

// A+B --> A|B iff A and B have no bits set in common.

- if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))

- return BinaryOperator::CreateOr(LHS, RHS);

+ WithCache<const Value *> LHSCache(LHS), RHSCache(RHS);

+ if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ.getWithInstruction(&I)))

+ return BinaryOperator::CreateDisjointOr(LHS, RHS);

if (Instruction *Ext = narrowMathIfNoOverflow(I))

return Ext;

@@ -1576,15 +1673,33 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {

m_c_UMin(m_Deferred(A), m_Deferred(B))))))

return BinaryOperator::CreateWithCopiedFlags(Instruction::Add, A, B, &I);

+ // (~X) + (~Y) --> -2 - (X + Y)

+ {

+ // To ensure we can save instructions we need to ensure that we consume both

+ // LHS/RHS (i.e they have a `not`).

+ bool ConsumesLHS, ConsumesRHS;

+ if (isFreeToInvert(LHS, LHS->hasOneUse(), ConsumesLHS) && ConsumesLHS &&

+ isFreeToInvert(RHS, RHS->hasOneUse(), ConsumesRHS) && ConsumesRHS) {

+ Value *NotLHS = getFreelyInverted(LHS, LHS->hasOneUse(), &Builder);

+ Value *NotRHS = getFreelyInverted(RHS, RHS->hasOneUse(), &Builder);

+ assert(NotLHS != nullptr && NotRHS != nullptr &&

+ "isFreeToInvert desynced with getFreelyInverted");

+ Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS);

+ return BinaryOperator::CreateSub(ConstantInt::get(RHS->getType(), -2),

+ LHSPlusRHS);

+ }

// TODO(jingyue): Consider willNotOverflowSignedAdd and

// willNotOverflowUnsignedAdd to reduce the number of invocations of

// computeKnownBits.

bool Changed = false;

- if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS, RHS, I)) {

+ if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHSCache, RHSCache, I)) {

Changed = true;

I.setHasNoSignedWrap(true);

}

- if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedAdd(LHS, RHS, I)) {

+ if (!I.hasNoUnsignedWrap() &&

+ willNotOverflowUnsignedAdd(LHSCache, RHSCache, I)) {

Changed = true;

I.setHasNoUnsignedWrap(true);

}

@@ -1610,11 +1725,14 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {

// ctpop(A) + ctpop(B) => ctpop(A | B) if A and B have no bits set in common.

if (match(LHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(A)))) &&

match(RHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(B)))) &&

- haveNoCommonBitsSet(A, B, DL, &AC, &I, &DT))

+ haveNoCommonBitsSet(A, B, SQ.getWithInstruction(&I)))

return replaceInstUsesWith(

I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},

{Builder.CreateOr(A, B)}));

+ if (Instruction *Res = foldSquareSumInt(I))

+ return Res;

if (Instruction *Res = foldBinOpOfDisplacedShifts(I))

return Res;

@@ -1755,10 +1873,11 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {

// instcombined.

if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))

if (IsValidPromotion(FPType, LHSIntVal->getType())) {

- Constant *CI =

- ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());

+ Constant *CI = ConstantFoldCastOperand(Instruction::FPToSI, CFP,

+ LHSIntVal->getType(), DL);

if (LHSConv->hasOneUse() &&

- ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&

+ ConstantFoldCastOperand(Instruction::SIToFP, CI, I.getType(), DL) ==

+ CFP &&

willNotOverflowSignedAdd(LHSIntVal, CI, I)) {

// Insert the new integer add.

Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv");

@@ -1794,6 +1913,9 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {

if (Instruction *F = factorizeFAddFSub(I, Builder))

return F;

+ if (Instruction *F = foldSquareSumFP(I))

+ return F;

// Try to fold fadd into start value of reduction intrinsic.

if (match(&I, m_c_FAdd(m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(

m_AnyZeroFP(), m_Value(X))),

@@ -2017,14 +2139,16 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {

// C-(X+C2) --> (C-C2)-X

if (match(Op1, m_Add(m_Value(X), m_ImmConstant(C2)))) {

- // C-C2 never overflow, and C-(X+C2), (X+C2) has NSW

- // => (C-C2)-X can have NSW

+ // C-C2 never overflow, and C-(X+C2), (X+C2) has NSW/NUW

+ // => (C-C2)-X can have NSW/NUW

bool WillNotSOV = willNotOverflowSignedSub(C, C2, I);

BinaryOperator *Res =

BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);

auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);

Res->setHasNoSignedWrap(I.hasNoSignedWrap() && OBO1->hasNoSignedWrap() &&

WillNotSOV);

+ Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap() &&

+ OBO1->hasNoUnsignedWrap());

return Res;

}

@@ -2058,7 +2182,9 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {

m_Select(m_Value(), m_Specific(Op1), m_Specific(&I))) ||

match(UI, m_Select(m_Value(), m_Specific(&I), m_Specific(Op1)));

})) {

- if (Value *NegOp1 = Negator::Negate(IsNegation, Op1, *this))

+ if (Value *NegOp1 = Negator::Negate(IsNegation, /* IsNSW */ IsNegation &&

+ I.hasNoSignedWrap(),

+ Op1, *this))

return BinaryOperator::CreateAdd(NegOp1, Op0);

}

if (IsNegation)

@@ -2093,19 +2219,50 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {

// ((X - Y) - Op1) --> X - (Y + Op1)

if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y))))) {

- Value *Add = Builder.CreateAdd(Y, Op1);

- return BinaryOperator::CreateSub(X, Add);

+ OverflowingBinaryOperator *LHSSub = cast<OverflowingBinaryOperator>(Op0);

+ bool HasNUW = I.hasNoUnsignedWrap() && LHSSub->hasNoUnsignedWrap();

+ bool HasNSW = HasNUW && I.hasNoSignedWrap() && LHSSub->hasNoSignedWrap();

+ Value *Add = Builder.CreateAdd(Y, Op1, "", /* HasNUW */ HasNUW,

+ /* HasNSW */ HasNSW);

+ BinaryOperator *Sub = BinaryOperator::CreateSub(X, Add);

+ Sub->setHasNoUnsignedWrap(HasNUW);

+ Sub->setHasNoSignedWrap(HasNSW);

+ return Sub;

+ }

+ {

+ // (X + Z) - (Y + Z) --> (X - Y)

+ // This is done in other passes, but we want to be able to consume this

+ // pattern in InstCombine so we can generate it without creating infinite

+ // loops.

+ if (match(Op0, m_Add(m_Value(X), m_Value(Z))) &&

+ match(Op1, m_c_Add(m_Value(Y), m_Specific(Z))))

+ return BinaryOperator::CreateSub(X, Y);

+ // (X + C0) - (Y + C1) --> (X - Y) + (C0 - C1)

+ Constant *CX, *CY;

+ if (match(Op0, m_OneUse(m_Add(m_Value(X), m_ImmConstant(CX)))) &&

+ match(Op1, m_OneUse(m_Add(m_Value(Y), m_ImmConstant(CY))))) {

+ Value *OpsSub = Builder.CreateSub(X, Y);

+ Constant *ConstsSub = ConstantExpr::getSub(CX, CY);

+ return BinaryOperator::CreateAdd(OpsSub, ConstsSub);

+ }

}

// (~X) - (~Y) --> Y - X

- // This is placed after the other reassociations and explicitly excludes a

- // sub-of-sub pattern to avoid infinite looping.

- if (isFreeToInvert(Op0, Op0->hasOneUse()) &&

- isFreeToInvert(Op1, Op1->hasOneUse()) &&

- !match(Op0, m_Sub(m_ImmConstant(), m_Value()))) {

- Value *NotOp0 = Builder.CreateNot(Op0);

- Value *NotOp1 = Builder.CreateNot(Op1);

- return BinaryOperator::CreateSub(NotOp1, NotOp0);

+ {

+ // Need to ensure we can consume at least one of the `not` instructions,

+ // otherwise this can inf loop.

+ bool ConsumesOp0, ConsumesOp1;

+ if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) &&

+ isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) &&

+ (ConsumesOp0 || ConsumesOp1)) {

+ Value *NotOp0 = getFreelyInverted(Op0, Op0->hasOneUse(), &Builder);

+ Value *NotOp1 = getFreelyInverted(Op1, Op1->hasOneUse(), &Builder);

+ assert(NotOp0 != nullptr && NotOp1 != nullptr &&

+ "isFreeToInvert desynced with getFreelyInverted");

+ return BinaryOperator::CreateSub(NotOp1, NotOp0);

+ }

}

auto m_AddRdx = [](Value *&Vec) {

@@ -2520,18 +2677,33 @@ static Instruction *foldFNegIntoConstant(Instruction &I, const DataLayout &DL) {

return nullptr;

}

-static Instruction *hoistFNegAboveFMulFDiv(Instruction &I,

- InstCombiner::BuilderTy &Builder) {

- Value *FNeg;

- if (!match(&I, m_FNeg(m_Value(FNeg))))

- return nullptr;

+Instruction *InstCombinerImpl::hoistFNegAboveFMulFDiv(Value *FNegOp,

+ Instruction &FMFSource) {

Value *X, *Y;

- if (match(FNeg, m_OneUse(m_FMul(m_Value(X), m_Value(Y)))))

- return BinaryOperator::CreateFMulFMF(Builder.CreateFNegFMF(X, &I), Y, &I);

+ if (match(FNegOp, m_FMul(m_Value(X), m_Value(Y)))) {

+ return cast<Instruction>(Builder.CreateFMulFMF(

+ Builder.CreateFNegFMF(X, &FMFSource), Y, &FMFSource));

+ }

+ if (match(FNegOp, m_FDiv(m_Value(X), m_Value(Y)))) {

+ return cast<Instruction>(Builder.CreateFDivFMF(

+ Builder.CreateFNegFMF(X, &FMFSource), Y, &FMFSource));

+ }

+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(FNegOp)) {

+ // Make sure to preserve flags and metadata on the call.

+ if (II->getIntrinsicID() == Intrinsic::ldexp) {

+ FastMathFlags FMF = FMFSource.getFastMathFlags() | II->getFastMathFlags();

+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);

+ Builder.setFastMathFlags(FMF);

- if (match(FNeg, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))))

- return BinaryOperator::CreateFDivFMF(Builder.CreateFNegFMF(X, &I), Y, &I);

+ CallInst *New = Builder.CreateCall(

+ II->getCalledFunction(),

+ {Builder.CreateFNeg(II->getArgOperand(0)), II->getArgOperand(1)});

+ New->copyMetadata(*II);

+ return New;

+ }

return nullptr;

}

@@ -2553,13 +2725,13 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {

match(Op, m_OneUse(m_FSub(m_Value(X), m_Value(Y)))))

return BinaryOperator::CreateFSubFMF(Y, X, &I);

- if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder))

- return R;

Value *OneUse;

if (!match(Op, m_OneUse(m_Value(OneUse))))

return nullptr;

+ if (Instruction *R = hoistFNegAboveFMulFDiv(OneUse, I))

+ return replaceInstUsesWith(I, R);

// Try to eliminate fneg if at least 1 arm of the select is negated.

Value *Cond;

if (match(OneUse, m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))) {

@@ -2569,8 +2741,7 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {

auto propagateSelectFMF = [&](SelectInst *S, bool CommonOperand) {

S->copyFastMathFlags(&I);

if (auto *OldSel = dyn_cast<SelectInst>(Op)) {

- FastMathFlags FMF = I.getFastMathFlags();

- FMF |= OldSel->getFastMathFlags();

+ FastMathFlags FMF = I.getFastMathFlags() | OldSel->getFastMathFlags();

S->setFastMathFlags(FMF);

if (!OldSel->hasNoSignedZeros() && !CommonOperand &&

!isGuaranteedNotToBeUndefOrPoison(OldSel->getCondition()))

@@ -2638,9 +2809,6 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {

if (Instruction *X = foldFNegIntoConstant(I, DL))

return X;

- if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder))

- return R;

Value *X, *Y;

Constant *C;

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 8a1fb6b7f17e..6002f599ca71 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

@@ -1099,39 +1099,6 @@ static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp,

return Builder.CreateICmpUGE(Builder.CreateNeg(B), A);

}

- Value *Base, *Offset;

- if (!match(ZeroCmpOp, m_Sub(m_Value(Base), m_Value(Offset))))

- return nullptr;

- if (!match(UnsignedICmp,

- m_c_ICmp(UnsignedPred, m_Specific(Base), m_Specific(Offset))) ||

- !ICmpInst::isUnsigned(UnsignedPred))

- return nullptr;

- // Base >=/> Offset && (Base - Offset) != 0 <--> Base > Offset

- // (no overflow and not null)

- if ((UnsignedPred == ICmpInst::ICMP_UGE ||

- UnsignedPred == ICmpInst::ICMP_UGT) &&

- EqPred == ICmpInst::ICMP_NE && IsAnd)

- return Builder.CreateICmpUGT(Base, Offset);

- // Base <=/< Offset || (Base - Offset) == 0 <--> Base <= Offset

- // (overflow or null)

- if ((UnsignedPred == ICmpInst::ICMP_ULE ||

- UnsignedPred == ICmpInst::ICMP_ULT) &&

- EqPred == ICmpInst::ICMP_EQ && !IsAnd)

- return Builder.CreateICmpULE(Base, Offset);

- // Base <= Offset && (Base - Offset) != 0 --> Base < Offset

- if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&

- IsAnd)

- return Builder.CreateICmpULT(Base, Offset);

- // Base > Offset || (Base - Offset) == 0 --> Base >= Offset

- if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&

- !IsAnd)

- return Builder.CreateICmpUGE(Base, Offset);

return nullptr;

}

@@ -1179,13 +1146,40 @@ Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1,

return nullptr;

CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;

- if (Cmp0->getPredicate() != Pred || Cmp1->getPredicate() != Pred)

- return nullptr;

+ auto GetMatchPart = [&](ICmpInst *Cmp,

+ unsigned OpNo) -> std::optional<IntPart> {

+ if (Pred == Cmp->getPredicate())

+ return matchIntPart(Cmp->getOperand(OpNo));

+ const APInt *C;

+ // (icmp eq (lshr x, C), (lshr y, C)) gets optimized to:

+ // (icmp ult (xor x, y), 1 << C) so also look for that.

+ if (Pred == CmpInst::ICMP_EQ && Cmp->getPredicate() == CmpInst::ICMP_ULT) {

+ if (!match(Cmp->getOperand(1), m_Power2(C)) ||

+ !match(Cmp->getOperand(0), m_Xor(m_Value(), m_Value())))

+ return std::nullopt;

+ }

- std::optional<IntPart> L0 = matchIntPart(Cmp0->getOperand(0));

- std::optional<IntPart> R0 = matchIntPart(Cmp0->getOperand(1));

- std::optional<IntPart> L1 = matchIntPart(Cmp1->getOperand(0));

- std::optional<IntPart> R1 = matchIntPart(Cmp1->getOperand(1));

+ // (icmp ne (lshr x, C), (lshr y, C)) gets optimized to:

+ // (icmp ugt (xor x, y), (1 << C) - 1) so also look for that.

+ else if (Pred == CmpInst::ICMP_NE &&

+ Cmp->getPredicate() == CmpInst::ICMP_UGT) {

+ if (!match(Cmp->getOperand(1), m_LowBitMask(C)) ||

+ !match(Cmp->getOperand(0), m_Xor(m_Value(), m_Value())))

+ return std::nullopt;

+ } else {

+ return std::nullopt;

+ }

+ unsigned From = Pred == CmpInst::ICMP_NE ? C->popcount() : C->countr_zero();

+ Instruction *I = cast<Instruction>(Cmp->getOperand(0));

+ return {{I->getOperand(OpNo), From, C->getBitWidth() - From}};

+ };

+ std::optional<IntPart> L0 = GetMatchPart(Cmp0, 0);

+ std::optional<IntPart> R0 = GetMatchPart(Cmp0, 1);

+ std::optional<IntPart> L1 = GetMatchPart(Cmp1, 0);

+ std::optional<IntPart> R1 = GetMatchPart(Cmp1, 1);

if (!L0 || !R0 || !L1 || !R1)

return nullptr;

@@ -1616,7 +1610,7 @@ static Instruction *reassociateFCmps(BinaryOperator &BO,

/// (~A & ~B) == (~(A | B))

/// (~A | ~B) == (~(A & B))

static Instruction *matchDeMorgansLaws(BinaryOperator &I,

- InstCombiner::BuilderTy &Builder) {

+ InstCombiner &IC) {

const Instruction::BinaryOps Opcode = I.getOpcode();

assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&

"Trying to match De Morgan's Laws with something other than and/or");

@@ -1629,10 +1623,10 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I,

Value *A, *B;

if (match(Op0, m_OneUse(m_Not(m_Value(A)))) &&

match(Op1, m_OneUse(m_Not(m_Value(B)))) &&

- !InstCombiner::isFreeToInvert(A, A->hasOneUse()) &&

- !InstCombiner::isFreeToInvert(B, B->hasOneUse())) {

+ !IC.isFreeToInvert(A, A->hasOneUse()) &&

+ !IC.isFreeToInvert(B, B->hasOneUse())) {

Value *AndOr =

- Builder.CreateBinOp(FlippedOpcode, A, B, I.getName() + ".demorgan");

+ IC.Builder.CreateBinOp(FlippedOpcode, A, B, I.getName() + ".demorgan");

return BinaryOperator::CreateNot(AndOr);

}

@@ -1644,8 +1638,8 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I,

Value *C;

if (match(Op0, m_OneUse(m_c_BinOp(Opcode, m_Value(A), m_Not(m_Value(B))))) &&

match(Op1, m_Not(m_Value(C)))) {

- Value *FlippedBO = Builder.CreateBinOp(FlippedOpcode, B, C);

- return BinaryOperator::Create(Opcode, A, Builder.CreateNot(FlippedBO));

+ Value *FlippedBO = IC.Builder.CreateBinOp(FlippedOpcode, B, C);

+ return BinaryOperator::Create(Opcode, A, IC.Builder.CreateNot(FlippedBO));

}

return nullptr;

@@ -1669,7 +1663,7 @@ bool InstCombinerImpl::shouldOptimizeCast(CastInst *CI) {

/// Fold {and,or,xor} (cast X), C.

static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,

- InstCombiner::BuilderTy &Builder) {

+ InstCombinerImpl &IC) {

Constant *C = dyn_cast<Constant>(Logic.getOperand(1));

if (!C)

return nullptr;

@@ -1684,21 +1678,17 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,

// instruction may be cheaper (particularly in the case of vectors).

Value *X;

if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) {

- Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);

- Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy);

- if (ZextTruncC == C) {

+ if (Constant *TruncC = IC.getLosslessUnsignedTrunc(C, SrcTy)) {

// LogicOpc (zext X), C --> zext (LogicOpc X, C)

- Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC);

+ Value *NewOp = IC.Builder.CreateBinOp(LogicOpc, X, TruncC);

return new ZExtInst(NewOp, DestTy);

}

if (match(Cast, m_OneUse(m_SExt(m_Value(X))))) {

- Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);

- Constant *SextTruncC = ConstantExpr::getSExt(TruncC, DestTy);

- if (SextTruncC == C) {

+ if (Constant *TruncC = IC.getLosslessSignedTrunc(C, SrcTy)) {

// LogicOpc (sext X), C --> sext (LogicOpc X, C)

- Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC);

+ Value *NewOp = IC.Builder.CreateBinOp(LogicOpc, X, TruncC);

return new SExtInst(NewOp, DestTy);

}

@@ -1756,7 +1746,7 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) {

if (!SrcTy->isIntOrIntVectorTy())

return nullptr;

- if (Instruction *Ret = foldLogicCastConstant(I, Cast0, Builder))

+ if (Instruction *Ret = foldLogicCastConstant(I, Cast0, *this))

return Ret;

CastInst *Cast1 = dyn_cast<CastInst>(Op1);

@@ -1802,29 +1792,6 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) {

return CastInst::Create(CastOpcode, NewOp, DestTy);

}

- // For now, only 'and'/'or' have optimizations after this.

- if (LogicOpc == Instruction::Xor)

- return nullptr;

- // If this is logic(cast(icmp), cast(icmp)), try to fold this even if the

- // cast is otherwise not optimizable. This happens for vector sexts.

- ICmpInst *ICmp0 = dyn_cast<ICmpInst>(Cast0Src);

- ICmpInst *ICmp1 = dyn_cast<ICmpInst>(Cast1Src);

- if (ICmp0 && ICmp1) {

- if (Value *Res =

- foldAndOrOfICmps(ICmp0, ICmp1, I, LogicOpc == Instruction::And))

- return CastInst::Create(CastOpcode, Res, DestTy);

- return nullptr;

- }

- // If this is logic(cast(fcmp), cast(fcmp)), try to fold this even if the

- // cast is otherwise not optimizable. This happens for vector sexts.

- FCmpInst *FCmp0 = dyn_cast<FCmpInst>(Cast0Src);

- FCmpInst *FCmp1 = dyn_cast<FCmpInst>(Cast1Src);

- if (FCmp0 && FCmp1)

- if (Value *R = foldLogicOfFCmps(FCmp0, FCmp1, LogicOpc == Instruction::And))

- return CastInst::Create(CastOpcode, R, DestTy);

return nullptr;

}

@@ -2160,10 +2127,10 @@ Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) {

Constant *ShiftedC1, *ShiftedC2, *AddC;

Type *Ty = I.getType();

unsigned BitWidth = Ty->getScalarSizeInBits();

- if (!match(&I,

- m_c_BinOp(m_Shift(m_ImmConstant(ShiftedC1), m_Value(ShAmt)),

- m_Shift(m_ImmConstant(ShiftedC2),

- m_Add(m_Deferred(ShAmt), m_ImmConstant(AddC))))))

+ if (!match(&I, m_c_BinOp(m_Shift(m_ImmConstant(ShiftedC1), m_Value(ShAmt)),

+ m_Shift(m_ImmConstant(ShiftedC2),

+ m_AddLike(m_Deferred(ShAmt),

+ m_ImmConstant(AddC))))))

return nullptr;

// Make sure the add constant is a valid shift amount.

@@ -2254,6 +2221,14 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {

return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);

}

+ // Canonicalize:

+ // (X +/- Y) & Y --> ~X & Y when Y is a power of 2.

+ if (match(&I, m_c_And(m_Value(Y), m_OneUse(m_CombineOr(

+ m_c_Add(m_Value(X), m_Deferred(Y)),

+ m_Sub(m_Value(X), m_Deferred(Y)))))) &&

+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, /*Depth*/ 0, &I))

+ return BinaryOperator::CreateAnd(Builder.CreateNot(X), Y);

const APInt *C;

if (match(Op1, m_APInt(C))) {

const APInt *XorC;

@@ -2300,13 +2275,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {

const APInt *AddC;

if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) {

- // If we add zeros to every bit below a mask, the add has no effect:

- // (X + AddC) & LowMaskC --> X & LowMaskC

- unsigned Ctlz = C->countl_zero();

- APInt LowMask(APInt::getLowBitsSet(Width, Width - Ctlz));

- if ((*AddC & LowMask).isZero())

- return BinaryOperator::CreateAnd(X, Op1);

// If we are masking the result of the add down to exactly one bit and

// the constant we are adding has no bits set below that bit, then the

// add is flipping a single bit. Example:

@@ -2455,6 +2423,28 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {

}

+ // If we are clearing the sign bit of a floating-point value, convert this to

+ // fabs, then cast back to integer.

+ //

+ // This is a generous interpretation for noimplicitfloat, this is not a true

+ // floating-point operation.

+ //

+ // Assumes any IEEE-represented type has the sign bit in the high bit.

+ // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt

+ Value *CastOp;

+ if (match(Op0, m_BitCast(m_Value(CastOp))) &&

+ match(Op1, m_MaxSignedValue()) &&

+ !Builder.GetInsertBlock()->getParent()->hasFnAttribute(

+ Attribute::NoImplicitFloat)) {

+ Type *EltTy = CastOp->getType()->getScalarType();

+ if (EltTy->isFloatingPointTy() && EltTy->isIEEE() &&

+ EltTy->getPrimitiveSizeInBits() ==

+ I.getType()->getScalarType()->getPrimitiveSizeInBits()) {

+ Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp);

+ return new BitCastInst(FAbs, I.getType());

+ }

if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))),

m_SignMask())) &&

match(Y, m_SpecificInt_ICMP(

@@ -2479,21 +2469,21 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {

if (I.getType()->isIntOrIntVectorTy(1)) {

if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {

- if (auto *I =

+ if (auto *R =

foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ true))

- return I;

+ return R;

}

if (auto *SI1 = dyn_cast<SelectInst>(Op1)) {

- if (auto *I =

+ if (auto *R =

foldAndOrOfSelectUsingImpliedCond(Op0, *SI1, /* IsAnd */ true))

- return I;

+ return R;

}

if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))

return FoldedLogic;

- if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))

+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this))

return DeMorgan;

{

@@ -2513,16 +2503,24 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {

return BinaryOperator::CreateAnd(Op1, B);

// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C

- if (match(Op0, m_Xor(m_Value(A), m_Value(B))))

- if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))

- if (Op1->hasOneUse() || isFreeToInvert(C, C->hasOneUse()))

- return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C));

+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&

+ match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) {

+ Value *NotC = Op1->hasOneUse()

+ ? Builder.CreateNot(C)

+ : getFreelyInverted(C, C->hasOneUse(), &Builder);

+ if (NotC != nullptr)

+ return BinaryOperator::CreateAnd(Op0, NotC);

+ }

// ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C

- if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))

- if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))

- if (Op0->hasOneUse() || isFreeToInvert(C, C->hasOneUse()))

- return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));

+ if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))) &&

+ match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) {

+ Value *NotC = Op0->hasOneUse()

+ ? Builder.CreateNot(C)

+ : getFreelyInverted(C, C->hasOneUse(), &Builder);

+ if (NotC != nullptr)

+ return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));

+ }

// (A | B) & (~A ^ B) -> A & B

// (A | B) & (B ^ ~A) -> A & B

@@ -2621,23 +2619,34 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {

// with binop identity constant. But creating a select with non-constant

// arm may not be reversible due to poison semantics. Is that a good

// canonicalization?

- Value *A;

- if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&

- A->getType()->isIntOrIntVectorTy(1))

- return SelectInst::Create(A, Op1, Constant::getNullValue(Ty));

- if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&

+ Value *A, *B;

+ if (match(&I, m_c_And(m_OneUse(m_SExt(m_Value(A))), m_Value(B))) &&

A->getType()->isIntOrIntVectorTy(1))

- return SelectInst::Create(A, Op0, Constant::getNullValue(Ty));

+ return SelectInst::Create(A, B, Constant::getNullValue(Ty));

// Similarly, a 'not' of the bool translates to a swap of the select arms:

- // ~sext(A) & Op1 --> A ? 0 : Op1

- // Op0 & ~sext(A) --> A ? 0 : Op0

- if (match(Op0, m_Not(m_SExt(m_Value(A)))) &&

+ // ~sext(A) & B / B & ~sext(A) --> A ? 0 : B

+ if (match(&I, m_c_And(m_Not(m_SExt(m_Value(A))), m_Value(B))) &&

A->getType()->isIntOrIntVectorTy(1))

- return SelectInst::Create(A, Constant::getNullValue(Ty), Op1);

- if (match(Op1, m_Not(m_SExt(m_Value(A)))) &&

+ return SelectInst::Create(A, Constant::getNullValue(Ty), B);

+ // and(zext(A), B) -> A ? (B & 1) : 0

+ if (match(&I, m_c_And(m_OneUse(m_ZExt(m_Value(A))), m_Value(B))) &&

A->getType()->isIntOrIntVectorTy(1))

- return SelectInst::Create(A, Constant::getNullValue(Ty), Op0);

+ return SelectInst::Create(A, Builder.CreateAnd(B, ConstantInt::get(Ty, 1)),

+ Constant::getNullValue(Ty));

+ // (-1 + A) & B --> A ? 0 : B where A is 0/1.

+ if (match(&I, m_c_And(m_OneUse(m_Add(m_ZExtOrSelf(m_Value(A)), m_AllOnes())),

+ m_Value(B)))) {

+ if (A->getType()->isIntOrIntVectorTy(1))

+ return SelectInst::Create(A, Constant::getNullValue(Ty), B);

+ if (computeKnownBits(A, /* Depth */ 0, &I).countMaxActiveBits() <= 1) {

+ return SelectInst::Create(

+ Builder.CreateICmpEQ(A, Constant::getNullValue(A->getType())), B,

+ Constant::getNullValue(Ty));

+ }

// (iN X s>> (N-1)) & Y --> (X s< 0) ? Y : 0 -- with optional sext

if (match(&I, m_c_And(m_OneUse(m_SExtOrSelf(

@@ -2698,105 +2707,178 @@ Instruction *InstCombinerImpl::matchBSwapOrBitReverse(Instruction &I,

}

/// Match UB-safe variants of the funnel shift intrinsic.

-static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {

+static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC,

+ const DominatorTree &DT) {

// TODO: Can we reduce the code duplication between this and the related

// rotate matching code under visitSelect and visitTrunc?

unsigned Width = Or.getType()->getScalarSizeInBits();

+ Instruction *Or0, *Or1;

+ if (!match(Or.getOperand(0), m_Instruction(Or0)) ||

+ !match(Or.getOperand(1), m_Instruction(Or1)))

+ return nullptr;

+ bool IsFshl = true; // Sub on LSHR.

+ SmallVector<Value *, 3> FShiftArgs;

// First, find an or'd pair of opposite shifts:

// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)

- BinaryOperator *Or0, *Or1;

- if (!match(Or.getOperand(0), m_BinOp(Or0)) ||

- !match(Or.getOperand(1), m_BinOp(Or1)))

- return nullptr;

+ if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {

+ Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;

+ if (!match(Or0,

+ m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||

+ !match(Or1,

+ m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||

+ Or0->getOpcode() == Or1->getOpcode())

+ return nullptr;

- Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;

- if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||

- !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||

- Or0->getOpcode() == Or1->getOpcode())

- return nullptr;

+ // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).

+ if (Or0->getOpcode() == BinaryOperator::LShr) {

+ std::swap(Or0, Or1);

+ std::swap(ShVal0, ShVal1);

+ std::swap(ShAmt0, ShAmt1);

+ }

+ assert(Or0->getOpcode() == BinaryOperator::Shl &&

+ Or1->getOpcode() == BinaryOperator::LShr &&

+ "Illegal or(shift,shift) pair");

- // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).

- if (Or0->getOpcode() == BinaryOperator::LShr) {

- std::swap(Or0, Or1);

- std::swap(ShVal0, ShVal1);

- std::swap(ShAmt0, ShAmt1);

- }

- assert(Or0->getOpcode() == BinaryOperator::Shl &&

- Or1->getOpcode() == BinaryOperator::LShr &&

- "Illegal or(shift,shift) pair");

+ // Match the shift amount operands for a funnel shift pattern. This always

+ // matches a subtraction on the R operand.

+ auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {

+ // Check for constant shift amounts that sum to the bitwidth.

+ const APInt *LI, *RI;

+ if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))

+ if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)

+ return ConstantInt::get(L->getType(), *LI);

+ Constant *LC, *RC;

+ if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&

+ match(L,

+ m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&

+ match(R,

+ m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&

+ match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))

+ return ConstantExpr::mergeUndefsWith(LC, RC);

+ // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.

+ // We limit this to X < Width in case the backend re-expands the

+ // intrinsic, and has to reintroduce a shift modulo operation (InstCombine

+ // might remove it after this fold). This still doesn't guarantee that the

+ // final codegen will match this original pattern.

+ if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {

+ KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);

+ return KnownL.getMaxValue().ult(Width) ? L : nullptr;

+ }

- // Match the shift amount operands for a funnel shift pattern. This always

- // matches a subtraction on the R operand.

- auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {

- // Check for constant shift amounts that sum to the bitwidth.

- const APInt *LI, *RI;

- if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))

- if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)

- return ConstantInt::get(L->getType(), *LI);

+ // For non-constant cases, the following patterns currently only work for

+ // rotation patterns.

+ // TODO: Add general funnel-shift compatible patterns.

+ if (ShVal0 != ShVal1)

+ return nullptr;

- Constant *LC, *RC;

- if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&

- match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&

- match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&

- match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))

- return ConstantExpr::mergeUndefsWith(LC, RC);

+ // For non-constant cases we don't support non-pow2 shift masks.

+ // TODO: Is it worth matching urem as well?

+ if (!isPowerOf2_32(Width))

+ return nullptr;

- // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.

- // We limit this to X < Width in case the backend re-expands the intrinsic,

- // and has to reintroduce a shift modulo operation (InstCombine might remove

- // it after this fold). This still doesn't guarantee that the final codegen

- // will match this original pattern.

- if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {

- KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);

- return KnownL.getMaxValue().ult(Width) ? L : nullptr;

+ // The shift amount may be masked with negation:

+ // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))

+ Value *X;

+ unsigned Mask = Width - 1;

+ if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&

+ match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))

+ return X;

+ // Similar to above, but the shift amount may be extended after masking,

+ // so return the extended value as the parameter for the intrinsic.

+ if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&

+ match(R,

+ m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),

+ m_SpecificInt(Mask))))

+ return L;

+ if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&

+ match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))

+ return L;

+ return nullptr;

+ };

+ Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);

+ if (!ShAmt) {

+ ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);

+ IsFshl = false; // Sub on SHL.

}

+ if (!ShAmt)

+ return nullptr;

+ FShiftArgs = {ShVal0, ShVal1, ShAmt};

+ } else if (isa<ZExtInst>(Or0) || isa<ZExtInst>(Or1)) {

+ // If there are two 'or' instructions concat variables in opposite order:

+ //

+ // Slot1 and Slot2 are all zero bits.

+ // LowHigh = or (shl (zext Low), ZextLowShlAmt), (zext High)

+ // HighLow = or (shl (zext High), ZextHighShlAmt), (zext Low)

+ //

+ // the latter 'or' can be safely convert to

+ // -> HighLow = fshl LowHigh, LowHigh, ZextHighShlAmt

+ // if ZextLowShlAmt + ZextHighShlAmt == Width.

+ if (!isa<ZExtInst>(Or1))

+ std::swap(Or0, Or1);

- // For non-constant cases, the following patterns currently only work for

- // rotation patterns.

- // TODO: Add general funnel-shift compatible patterns.

- if (ShVal0 != ShVal1)

+ Value *High, *ZextHigh, *Low;

+ const APInt *ZextHighShlAmt;

+ if (!match(Or0,

+ m_OneUse(m_Shl(m_Value(ZextHigh), m_APInt(ZextHighShlAmt)))))

return nullptr;

- // For non-constant cases we don't support non-pow2 shift masks.

- // TODO: Is it worth matching urem as well?

- if (!isPowerOf2_32(Width))

+ if (!match(Or1, m_ZExt(m_Value(Low))) ||

+ !match(ZextHigh, m_ZExt(m_Value(High))))

return nullptr;

- // The shift amount may be masked with negation:

- // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))

- Value *X;

- unsigned Mask = Width - 1;

- if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&

- match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))

- return X;

+ unsigned HighSize = High->getType()->getScalarSizeInBits();

+ unsigned LowSize = Low->getType()->getScalarSizeInBits();

+ // Make sure High does not overlap with Low and most significant bits of

+ // High aren't shifted out.

+ if (ZextHighShlAmt->ult(LowSize) || ZextHighShlAmt->ugt(Width - HighSize))

+ return nullptr;

- // Similar to above, but the shift amount may be extended after masking,

- // so return the extended value as the parameter for the intrinsic.

- if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&

- match(R, m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),

- m_SpecificInt(Mask))))

- return L;

+ for (User *U : ZextHigh->users()) {

+ Value *X, *Y;

+ if (!match(U, m_Or(m_Value(X), m_Value(Y))))

+ continue;

- if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&

- match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))

- return L;

+ if (!isa<ZExtInst>(Y))

+ std::swap(X, Y);

- return nullptr;

- };

+ const APInt *ZextLowShlAmt;

+ if (!match(X, m_Shl(m_Specific(Or1), m_APInt(ZextLowShlAmt))) ||

+ !match(Y, m_Specific(ZextHigh)) || !DT.dominates(U, &Or))

+ continue;

- Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);

- bool IsFshl = true; // Sub on LSHR.

- if (!ShAmt) {

- ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);

- IsFshl = false; // Sub on SHL.

+ // HighLow is good concat. If sum of two shifts amount equals to Width,

+ // LowHigh must also be a good concat.

+ if (*ZextLowShlAmt + *ZextHighShlAmt != Width)

+ continue;

+ // Low must not overlap with High and most significant bits of Low must

+ // not be shifted out.

+ assert(ZextLowShlAmt->uge(HighSize) &&

+ ZextLowShlAmt->ule(Width - LowSize) && "Invalid concat");

+ FShiftArgs = {U, U, ConstantInt::get(Or0->getType(), *ZextHighShlAmt)};

+ break;

+ }

}

- if (!ShAmt)

+ if (FShiftArgs.empty())

return nullptr;

Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;

Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());

- return CallInst::Create(F, {ShVal0, ShVal1, ShAmt});

+ return CallInst::Create(F, FShiftArgs);

}

/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.

@@ -3272,14 +3354,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {

Type *Ty = I.getType();

if (Ty->isIntOrIntVectorTy(1)) {

if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {

- if (auto *I =

+ if (auto *R =

foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ false))

- return I;

+ return R;

}

if (auto *SI1 = dyn_cast<SelectInst>(Op1)) {

- if (auto *I =

+ if (auto *R =

foldAndOrOfSelectUsingImpliedCond(Op0, *SI1, /* IsAnd */ false))

- return I;

+ return R;

}

@@ -3290,7 +3372,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {

/*MatchBitReversals*/ true))

return BitOp;

- if (Instruction *Funnel = matchFunnelShift(I, *this))

+ if (Instruction *Funnel = matchFunnelShift(I, *this, DT))

return Funnel;

if (Instruction *Concat = matchOrConcat(I, Builder))

@@ -3311,9 +3393,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {

// If the operands have no common bits set:

// or (mul X, Y), X --> add (mul X, Y), X --> mul X, (Y + 1)

- if (match(&I,

- m_c_Or(m_OneUse(m_Mul(m_Value(X), m_Value(Y))), m_Deferred(X))) &&

- haveNoCommonBitsSet(Op0, Op1, DL)) {

+ if (match(&I, m_c_DisjointOr(m_OneUse(m_Mul(m_Value(X), m_Value(Y))),

+ m_Deferred(X)))) {

Value *IncrementY = Builder.CreateAdd(Y, ConstantInt::get(Ty, 1));

return BinaryOperator::CreateMul(X, IncrementY);

}

@@ -3435,7 +3516,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {

if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))

return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C));

- if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))

+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this))

return DeMorgan;

// Canonicalize xor to the RHS.

@@ -3581,12 +3662,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {

// with binop identity constant. But creating a select with non-constant

// arm may not be reversible due to poison semantics. Is that a good

// canonicalization?

- if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&

+ if (match(&I, m_c_Or(m_OneUse(m_SExt(m_Value(A))), m_Value(B))) &&

A->getType()->isIntOrIntVectorTy(1))

- return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), Op1);

- if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&

- A->getType()->isIntOrIntVectorTy(1))

- return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), Op0);

+ return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), B);

// Note: If we've gotten to the point of visiting the outer OR, then the

// inner one couldn't be simplified. If it was a constant, then it won't

@@ -3628,6 +3706,26 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {

}

+ {

+ // ((A & B) ^ A) | ((A & B) ^ B) -> A ^ B

+ // (A ^ (A & B)) | (B ^ (A & B)) -> A ^ B

+ // ((A & B) ^ B) | ((A & B) ^ A) -> A ^ B

+ // (B ^ (A & B)) | (A ^ (A & B)) -> A ^ B

+ const auto TryXorOpt = [&](Value *Lhs, Value *Rhs) -> Instruction * {

+ if (match(Lhs, m_c_Xor(m_And(m_Value(A), m_Value(B)), m_Deferred(A))) &&

+ match(Rhs,

+ m_c_Xor(m_And(m_Specific(A), m_Specific(B)), m_Deferred(B)))) {

+ return BinaryOperator::CreateXor(A, B);

+ }

+ return nullptr;

+ };

+ if (Instruction *Result = TryXorOpt(Op0, Op1))

+ return Result;

+ if (Instruction *Result = TryXorOpt(Op1, Op0))

+ return Result;

+ }

if (Instruction *V =

canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))

return V;

@@ -3720,6 +3818,31 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {

if (Instruction *Res = foldBinOpOfDisplacedShifts(I))

return Res;

+ // If we are setting the sign bit of a floating-point value, convert

+ // this to fneg(fabs), then cast back to integer.

+ //

+ // If the result isn't immediately cast back to a float, this will increase

+ // the number of instructions. This is still probably a better canonical form

+ // as it enables FP value tracking.

+ //

+ // Assumes any IEEE-represented type has the sign bit in the high bit.

+ //

+ // This is generous interpretation of noimplicitfloat, this is not a true

+ // floating-point operation.

+ Value *CastOp;

+ if (match(Op0, m_BitCast(m_Value(CastOp))) && match(Op1, m_SignMask()) &&

+ !Builder.GetInsertBlock()->getParent()->hasFnAttribute(

+ Attribute::NoImplicitFloat)) {

+ Type *EltTy = CastOp->getType()->getScalarType();

+ if (EltTy->isFloatingPointTy() && EltTy->isIEEE() &&

+ EltTy->getPrimitiveSizeInBits() ==

+ I.getType()->getScalarType()->getPrimitiveSizeInBits()) {

+ Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp);

+ Value *FNegFAbs = Builder.CreateFNeg(FAbs);

+ return new BitCastInst(FNegFAbs, I.getType());

+ }

return nullptr;

}

@@ -3931,26 +4054,6 @@ static Instruction *visitMaskedMerge(BinaryOperator &I,

return nullptr;

}

-// Transform

-// ~(x ^ y)

-// into:

-// (~x) ^ y

-// or into

-// x ^ (~y)

-static Instruction *sinkNotIntoXor(BinaryOperator &I, Value *X, Value *Y,

- InstCombiner::BuilderTy &Builder) {

- // We only want to do the transform if it is free to do.

- if (InstCombiner::isFreeToInvert(X, X->hasOneUse())) {

- // Ok, good.

- } else if (InstCombiner::isFreeToInvert(Y, Y->hasOneUse())) {

- std::swap(X, Y);

- } else

- return nullptr;

- Value *NotX = Builder.CreateNot(X, X->getName() + ".not");

- return BinaryOperator::CreateXor(NotX, Y, I.getName() + ".demorgan");

static Instruction *foldNotXor(BinaryOperator &I,

InstCombiner::BuilderTy &Builder) {

Value *X, *Y;

@@ -3959,9 +4062,6 @@ static Instruction *foldNotXor(BinaryOperator &I,

if (!match(&I, m_Not(m_OneUse(m_Xor(m_Value(X), m_Value(Y))))))

return nullptr;

- if (Instruction *NewXor = sinkNotIntoXor(I, X, Y, Builder))

- return NewXor;

auto hasCommonOperand = [](Value *A, Value *B, Value *C, Value *D) {

return A == C || A == D || B == C || B == D;

};

@@ -4023,13 +4123,13 @@ static bool canFreelyInvert(InstCombiner &IC, Value *Op,

Instruction *IgnoredUser) {

auto *I = dyn_cast<Instruction>(Op);

return I && IC.isFreeToInvert(I, /*WillInvertAllUses=*/true) &&

- InstCombiner::canFreelyInvertAllUsersOf(I, IgnoredUser);

+ IC.canFreelyInvertAllUsersOf(I, IgnoredUser);

}

static Value *freelyInvert(InstCombinerImpl &IC, Value *Op,

Instruction *IgnoredUser) {

auto *I = cast<Instruction>(Op);

- IC.Builder.SetInsertPoint(&*I->getInsertionPointAfterDef());

+ IC.Builder.SetInsertPoint(*I->getInsertionPointAfterDef());

Value *NotOp = IC.Builder.CreateNot(Op, Op->getName() + ".not");

Op->replaceUsesWithIf(NotOp,

[NotOp](Use &U) { return U.getUser() != NotOp; });

@@ -4067,7 +4167,7 @@ bool InstCombinerImpl::sinkNotIntoLogicalOp(Instruction &I) {

Op0 = freelyInvert(*this, Op0, &I);

Op1 = freelyInvert(*this, Op1, &I);

- Builder.SetInsertPoint(I.getInsertionPointAfterDef());

+ Builder.SetInsertPoint(*I.getInsertionPointAfterDef());

Value *NewLogicOp;

if (IsBinaryOp)

NewLogicOp = Builder.CreateBinOp(NewOpc, Op0, Op1, I.getName() + ".not");

@@ -4115,7 +4215,7 @@ bool InstCombinerImpl::sinkNotIntoOtherHandOfLogicalOp(Instruction &I) {

*OpToInvert = freelyInvert(*this, *OpToInvert, &I);

- Builder.SetInsertPoint(&*I.getInsertionPointAfterDef());

+ Builder.SetInsertPoint(*I.getInsertionPointAfterDef());

Value *NewBinOp;

if (IsBinaryOp)

NewBinOp = Builder.CreateBinOp(NewOpc, Op0, Op1, I.getName() + ".not");

@@ -4259,15 +4359,6 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {

// ~max(~X, Y) --> min(X, ~Y)

auto *II = dyn_cast<IntrinsicInst>(NotOp);

if (II && II->hasOneUse()) {

- if (match(NotOp, m_MaxOrMin(m_Value(X), m_Value(Y))) &&

- isFreeToInvert(X, X->hasOneUse()) &&

- isFreeToInvert(Y, Y->hasOneUse())) {

- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());

- Value *NotX = Builder.CreateNot(X);

- Value *NotY = Builder.CreateNot(Y);

- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, NotX, NotY);

- return replaceInstUsesWith(I, InvMaxMin);

- }

if (match(NotOp, m_c_MaxOrMin(m_Not(m_Value(X)), m_Value(Y)))) {

Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());

Value *NotY = Builder.CreateNot(Y);

@@ -4317,6 +4408,11 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {

if (Instruction *NewXor = foldNotXor(I, Builder))

return NewXor;

+ // TODO: Could handle multi-use better by checking if all uses of NotOp (other

+ // than I) can be inverted.

+ if (Value *R = getFreelyInverted(NotOp, NotOp->hasOneUse(), &Builder))

+ return replaceInstUsesWith(I, R);

return nullptr;

}

@@ -4366,7 +4462,7 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {

Value *M;

if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()),

m_c_And(m_Deferred(M), m_Value()))))

- return BinaryOperator::CreateOr(Op0, Op1);

+ return BinaryOperator::CreateDisjointOr(Op0, Op1);

if (Instruction *Xor = visitMaskedMerge(I, Builder))

return Xor;

@@ -4466,6 +4562,27 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {

// a 'not' op and moving it before the shift. Doing that requires

// preventing the inverse fold in canShiftBinOpWithConstantRHS().

}

+ // If we are XORing the sign bit of a floating-point value, convert

+ // this to fneg, then cast back to integer.

+ //

+ // This is generous interpretation of noimplicitfloat, this is not a true

+ // floating-point operation.

+ //

+ // Assumes any IEEE-represented type has the sign bit in the high bit.

+ // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt

+ Value *CastOp;

+ if (match(Op0, m_BitCast(m_Value(CastOp))) && match(Op1, m_SignMask()) &&

+ !Builder.GetInsertBlock()->getParent()->hasFnAttribute(

+ Attribute::NoImplicitFloat)) {

+ Type *EltTy = CastOp->getType()->getScalarType();

+ if (EltTy->isFloatingPointTy() && EltTy->isIEEE() &&

+ EltTy->getPrimitiveSizeInBits() ==

+ I.getType()->getScalarType()->getPrimitiveSizeInBits()) {

+ Value *FNeg = Builder.CreateFNeg(CastOp);

+ return new BitCastInst(FNeg, I.getType());

+ }

}

// FIXME: This should not be limited to scalar (pull into APInt match above).

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d3ec6a7aa667..255ce6973a16 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

@@ -89,12 +89,6 @@ static cl::opt<unsigned> GuardWideningWindow(

cl::desc("How wide an instruction window to bypass looking for "

"another guard"));

-namespace llvm {

-/// enable preservation of attributes in assume like:

-/// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]

-extern cl::opt<bool> EnableKnowledgeRetention;

-} // namespace llvm

/// Return the specified type promoted as it would be to pass though a va_arg

/// area.

static Type *getPromotedType(Type *Ty) {

@@ -174,14 +168,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {

return nullptr;

// Use an integer load+store unless we can find something better.

- unsigned SrcAddrSp =

- cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();

- unsigned DstAddrSp =

- cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();

IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);

- Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);

- Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);

// If the memcpy has metadata describing the members, see if we can get the

// TBAA tag describing our copy.

@@ -200,8 +187,8 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {

CopyMD = cast<MDNode>(M->getOperand(2));

}

- Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);

- Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);

+ Value *Src = MI->getArgOperand(1);

+ Value *Dest = MI->getArgOperand(0);

LoadInst *L = Builder.CreateLoad(IntType, Src);

// Alignment from the mem intrinsic will be better, so use it.

L->setAlignment(*CopySrcAlign);

@@ -291,9 +278,6 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {

Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.

Value *Dest = MI->getDest();

- unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();

- Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);

- Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);

// Extract the fill value and store.

const uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;

@@ -301,7 +285,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {

StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());

S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);

for (auto *DAI : at::getAssignmentMarkers(S)) {

- if (any_of(DAI->location_ops(), [&](Value *V) { return V == FillC; }))

+ if (llvm::is_contained(DAI->location_ops(), FillC))

DAI->replaceVariableLocationOp(FillC, FillVal);

}

@@ -500,8 +484,6 @@ static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,

if (Result->getType()->getPointerAddressSpace() !=

II.getType()->getPointerAddressSpace())

Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());

- if (Result->getType() != II.getType())

- Result = IC.Builder.CreateBitCast(Result, II.getType());

return cast<Instruction>(Result);

}

@@ -532,6 +514,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {

return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));

}

+ Constant *C;

if (IsTZ) {

// cttz(-x) -> cttz(x)

if (match(Op0, m_Neg(m_Value(X))))

@@ -567,6 +551,38 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {

if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))

return IC.replaceOperand(II, 0, X);

+ // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)

+ if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&

+ match(Op1, m_One())) {

+ Value *ConstCttz =

+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);

+ return BinaryOperator::CreateAdd(ConstCttz, X);

+ }

+ // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)

+ if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&

+ match(Op1, m_One())) {

+ Value *ConstCttz =

+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);

+ return BinaryOperator::CreateSub(ConstCttz, X);

+ }

+ } else {

+ // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)

+ if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&

+ match(Op1, m_One())) {

+ Value *ConstCtlz =

+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);

+ return BinaryOperator::CreateAdd(ConstCtlz, X);

+ }

+ // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)

+ if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&

+ match(Op1, m_One())) {

+ Value *ConstCtlz =

+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);

+ return BinaryOperator::CreateSub(ConstCtlz, X);

+ }

}

KnownBits Known = IC.computeKnownBits(Op0, 0, &II);

@@ -911,11 +927,27 @@ Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {

Value *FAbsSrc;

if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {

- II.setArgOperand(1, ConstantInt::get(Src1->getType(), fabs(Mask)));

+ II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));

return replaceOperand(II, 0, FAbsSrc);

}

- // TODO: is.fpclass(x, fcInf) -> fabs(x) == inf

+ if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&

+ (IsOrdered || IsUnordered) && !IsStrict) {

+ // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf

+ // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf

+ // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf

+ // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf

+ Constant *Inf = ConstantFP::getInfinity(Src0->getType());

+ FCmpInst::Predicate Pred =

+ IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;

+ if (OrderedInvertedMask == fcInf)

+ Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;

+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Src0);

+ Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);

+ CmpInf->takeName(&II);

+ return replaceInstUsesWith(II, CmpInf);

+ }

if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&

(IsOrdered || IsUnordered) && !IsStrict) {

@@ -992,8 +1024,7 @@ Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {

return replaceInstUsesWith(II, FCmp);

}

- KnownFPClass Known = computeKnownFPClass(

- Src0, DL, Mask, 0, &getTargetLibraryInfo(), &AC, &II, &DT);

+ KnownFPClass Known = computeKnownFPClass(Src0, Mask, &II);

// Clear test bits we know must be false from the source value.

// fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other

@@ -1030,6 +1061,20 @@ static std::optional<bool> getKnownSign(Value *Op, Instruction *CxtI,

ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);

}

+static std::optional<bool> getKnownSignOrZero(Value *Op, Instruction *CxtI,

+ const DataLayout &DL,

+ AssumptionCache *AC,

+ DominatorTree *DT) {

+ if (std::optional<bool> Sign = getKnownSign(Op, CxtI, DL, AC, DT))

+ return Sign;

+ Value *X, *Y;

+ if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))

+ return isImpliedByDomCondition(ICmpInst::ICMP_SLE, X, Y, CxtI, DL);

+ return std::nullopt;

/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.

static bool signBitMustBeTheSame(Value *Op0, Value *Op1, Instruction *CxtI,

const DataLayout &DL, AssumptionCache *AC,

@@ -1530,12 +1575,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {

if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X))))

return replaceOperand(*II, 0, X);

- if (std::optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) {

- // abs(x) -> x if x >= 0

- if (!*Sign)

+ if (std::optional<bool> Known =

+ getKnownSignOrZero(IIOperand, II, DL, &AC, &DT)) {

+ // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)

+ // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)

+ if (!*Known)

return replaceInstUsesWith(*II, IIOperand);

// abs(x) -> -x if x < 0

+ // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)

if (IntMinIsPoison)

return BinaryOperator::CreateNSWNeg(IIOperand);

return BinaryOperator::CreateNeg(IIOperand);

@@ -1580,8 +1628,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {

Constant *C;

if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&

I0->hasOneUse()) {

- Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());

- if (ConstantExpr::getZExt(NarrowC, II->getType()) == C) {

+ if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType())) {

Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);

return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());

}

@@ -1603,13 +1650,26 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {

Constant *C;

if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&

I0->hasOneUse()) {

- Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());

- if (ConstantExpr::getSExt(NarrowC, II->getType()) == C) {

+ if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType())) {

Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);

return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());

}

+ // umin(i1 X, i1 Y) -> and i1 X, Y

+ // smax(i1 X, i1 Y) -> and i1 X, Y

+ if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&

+ II->getType()->isIntOrIntVectorTy(1)) {

+ return BinaryOperator::CreateAnd(I0, I1);

+ }

+ // umax(i1 X, i1 Y) -> or i1 X, Y

+ // smin(i1 X, i1 Y) -> or i1 X, Y

+ if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&

+ II->getType()->isIntOrIntVectorTy(1)) {

+ return BinaryOperator::CreateOr(I0, I1);

+ }

if (IID == Intrinsic::smax || IID == Intrinsic::smin) {

// smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)

// smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)

@@ -1672,12 +1732,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {

auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {

Value *A;

if (match(X, m_OneUse(m_Not(m_Value(A)))) &&

- !isFreeToInvert(A, A->hasOneUse()) &&

- isFreeToInvert(Y, Y->hasOneUse())) {

- Value *NotY = Builder.CreateNot(Y);

- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);

- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);

- return BinaryOperator::CreateNot(InvMaxMin);

+ !isFreeToInvert(A, A->hasOneUse())) {

+ if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {

+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);

+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);

+ return BinaryOperator::CreateNot(InvMaxMin);

+ }

}

return nullptr;

};

@@ -1929,6 +1989,52 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {

return &CI;

break;

}

+ case Intrinsic::ptrmask: {

+ unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());

+ KnownBits Known(BitWidth);

+ if (SimplifyDemandedInstructionBits(*II, Known))

+ return II;

+ Value *InnerPtr, *InnerMask;

+ bool Changed = false;

+ // Combine:

+ // (ptrmask (ptrmask p, A), B)

+ // -> (ptrmask p, (and A, B))

+ if (match(II->getArgOperand(0),

+ m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(InnerPtr),

+ m_Value(InnerMask))))) {

+ assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&

+ "Mask types must match");

+ // TODO: If InnerMask == Op1, we could copy attributes from inner

+ // callsite -> outer callsite.

+ Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);

+ replaceOperand(CI, 0, InnerPtr);

+ replaceOperand(CI, 1, NewMask);

+ Changed = true;

+ }

+ // See if we can deduce non-null.

+ if (!CI.hasRetAttr(Attribute::NonNull) &&

+ (Known.isNonZero() ||

+ isKnownNonZero(II, DL, /*Depth*/ 0, &AC, II, &DT))) {

+ CI.addRetAttr(Attribute::NonNull);

+ Changed = true;

+ }

+ unsigned NewAlignmentLog =

+ std::min(Value::MaxAlignmentExponent,

+ std::min(BitWidth - 1, Known.countMinTrailingZeros()));

+ // Known bits will capture if we had alignment information associated with

+ // the pointer argument.

+ if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {

+ CI.addRetAttr(Attribute::getWithAlignment(

+ CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));

+ Changed = true;

+ }

+ if (Changed)

+ return &CI;

+ break;

+ }

case Intrinsic::uadd_with_overflow:

case Intrinsic::sadd_with_overflow: {

if (Instruction *I = foldIntrinsicWithOverflowCommon(II))

@@ -2493,10 +2599,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {

VectorType *NewVT = cast<VectorType>(II->getType());

if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {

if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {

- CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);

- CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);

- return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));

+ Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);

+ Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);

+ return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));

}

// Couldn't simplify - canonicalize constant to the RHS.

@@ -2950,24 +3055,27 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {

return replaceOperand(CI, 0, InsertTuple);

}

- auto *DstTy = dyn_cast<FixedVectorType>(ReturnType);

- auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());

+ auto *DstTy = dyn_cast<VectorType>(ReturnType);

+ auto *VecTy = dyn_cast<VectorType>(Vec->getType());

- // Only canonicalize if the the destination vector and Vec are fixed

- // vectors.

if (DstTy && VecTy) {

- unsigned DstNumElts = DstTy->getNumElements();

- unsigned VecNumElts = VecTy->getNumElements();

+ auto DstEltCnt = DstTy->getElementCount();

+ auto VecEltCnt = VecTy->getElementCount();

unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();

// Extracting the entirety of Vec is a nop.

- if (VecNumElts == DstNumElts) {

+ if (DstEltCnt == VecTy->getElementCount()) {

replaceInstUsesWith(CI, Vec);

return eraseInstFromFunction(CI);

}

+ // Only canonicalize to shufflevector if the destination vector and

+ // Vec are fixed vectors.

+ if (VecEltCnt.isScalable() || DstEltCnt.isScalable())

+ break;

SmallVector<int, 8> Mask;

- for (unsigned i = 0; i != DstNumElts; ++i)

+ for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)

Mask.push_back(IdxN + i);

Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);

@@ -3943,9 +4051,9 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {

NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);

NC->setDebugLoc(Caller->getDebugLoc());

- Instruction *InsertPt = NewCall->getInsertionPointAfterDef();

- assert(InsertPt && "No place to insert cast");

- InsertNewInstBefore(NC, *InsertPt);

+ auto OptInsertPt = NewCall->getInsertionPointAfterDef();

+ assert(OptInsertPt && "No place to insert cast");

+ InsertNewInstBefore(NC, *OptInsertPt);

Worklist.pushUsersToWorkList(*Caller);

} else {

NV = PoisonValue::get(Caller->getType());

@@ -3972,8 +4080,6 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {

Instruction *

InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,

IntrinsicInst &Tramp) {

- Value *Callee = Call.getCalledOperand();

- Type *CalleeTy = Callee->getType();

FunctionType *FTy = Call.getFunctionType();

AttributeList Attrs = Call.getAttributes();

@@ -4070,12 +4176,8 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,

// Replace the trampoline call with a direct call. Let the generic

// code sort out any function type mismatches.

- FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,

- FTy->isVarArg());

- Constant *NewCallee =

- NestF->getType() == PointerType::getUnqual(NewFTy) ?

- NestF : ConstantExpr::getBitCast(NestF,

- PointerType::getUnqual(NewFTy));

+ FunctionType *NewFTy =

+ FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());

AttributeList NewPAL =

AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),

Attrs.getRetAttrs(), NewArgAttrs);

@@ -4085,19 +4187,18 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,

Instruction *NewCaller;

if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {

- NewCaller = InvokeInst::Create(NewFTy, NewCallee,

- II->getNormalDest(), II->getUnwindDest(),

- NewArgs, OpBundles);

+ NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),

+ II->getUnwindDest(), NewArgs, OpBundles);

cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());

cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);

} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {

NewCaller =

- CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(),

+ CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),

CBI->getIndirectDests(), NewArgs, OpBundles);

cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());

cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);

} else {

- NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles);

+ NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);

cast<CallInst>(NewCaller)->setTailCallKind(

cast<CallInst>(Call).getTailCallKind());

cast<CallInst>(NewCaller)->setCallingConv(

@@ -4113,7 +4214,6 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,

// Replace the trampoline call with a direct call. Since there is no 'nest'

// parameter, there is no need to adjust the argument list. Let the generic

// code sort out any function type mismatches.

- Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy);

- Call.setCalledFunction(FTy, NewCallee);

+ Call.setCalledFunction(FTy, NestF);

return &Call;

}

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 5c84f666616d..6629ca840a67 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

@@ -29,11 +29,8 @@ using namespace PatternMatch;

/// true for, actually insert the code to evaluate the expression.

Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,

bool isSigned) {

- if (Constant *C = dyn_cast<Constant>(V)) {

- C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);

- // If we got a constantexpr back, try to simplify it with DL info.

- return ConstantFoldConstant(C, DL, &TLI);

- }

+ if (Constant *C = dyn_cast<Constant>(V))

+ return ConstantFoldIntegerCast(C, Ty, isSigned, DL);

// Otherwise, it must be an instruction.

Instruction *I = cast<Instruction>(V);

@@ -112,7 +109,7 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,

}

Res->takeName(I);

- return InsertNewInstWith(Res, *I);

+ return InsertNewInstWith(Res, I->getIterator());

}

Instruction::CastOps

@@ -217,7 +214,8 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {

/// free to be evaluated in that type. This is a helper for canEvaluate*.

static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {

if (isa<Constant>(V))

- return true;

+ return match(V, m_ImmConstant());

Value *X;

if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&

X->getType() == Ty)

@@ -229,7 +227,6 @@ static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {

/// Filter out values that we can not evaluate in the destination type for free.

/// This is a helper for canEvaluate*.

static bool canNotEvaluateInType(Value *V, Type *Ty) {

- assert(!isa<Constant>(V) && "Constant should already be handled.");

if (!isa<Instruction>(V))

return true;

// We don't extend or shrink something that has multiple uses -- doing so

@@ -505,11 +502,13 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {

if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))

return nullptr;

- // We have an unnecessarily wide rotate!

- // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))

- // Narrow the inputs and convert to funnel shift intrinsic:

- // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))

- Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);

+ // Adjust the width of ShAmt for narrowed funnel shift operation:

+ // - Zero-extend if ShAmt is narrower than the destination type.

+ // - Truncate if ShAmt is wider, discarding non-significant high-order bits.

+ // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),

+ // zext/trunc(ShAmt)).

+ Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);

Value *X, *Y;

X = Y = Builder.CreateTrunc(ShVal0, DestTy);

if (ShVal0 != ShVal1)

@@ -582,13 +581,15 @@ Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {

APInt(SrcWidth, MaxShiftAmt)))) {

auto *OldShift = cast<Instruction>(Trunc.getOperand(0));

bool IsExact = OldShift->isExact();

- auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);

- ShAmt = Constant::mergeUndefsWith(ShAmt, C);

- Value *Shift =

- OldShift->getOpcode() == Instruction::AShr

- ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)

- : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);

- return CastInst::CreateTruncOrBitCast(Shift, DestTy);

+ if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),

+ /*IsSigned*/ true, DL)) {

+ ShAmt = Constant::mergeUndefsWith(ShAmt, C);

+ Value *Shift =

+ OldShift->getOpcode() == Instruction::AShr

+ ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)

+ : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);

+ return CastInst::CreateTruncOrBitCast(Shift, DestTy);

+ }

}

break;

@@ -904,19 +905,18 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,

// zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.

// zext (X != 0) to i32 --> X iff X has only the low bit set.

// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.

- if (Op1CV->isZero() && Cmp->isEquality() &&

- (Cmp->getOperand(0)->getType() == Zext.getType() ||

- Cmp->getPredicate() == ICmpInst::ICMP_NE)) {

- // If Op1C some other power of two, convert:

- KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);

+ if (Op1CV->isZero() && Cmp->isEquality()) {

// Exactly 1 possible 1? But not the high-bit because that is

// canonicalized to this form.

+ KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);

APInt KnownZeroMask(~Known.Zero);

- if (KnownZeroMask.isPowerOf2() &&

- (Zext.getType()->getScalarSizeInBits() !=

- KnownZeroMask.logBase2() + 1)) {

- uint32_t ShAmt = KnownZeroMask.logBase2();

+ uint32_t ShAmt = KnownZeroMask.logBase2();

+ bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&

+ (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);

+ if (IsExpectShAmt &&

+ (Cmp->getOperand(0)->getType() == Zext.getType() ||

+ Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {

Value *In = Cmp->getOperand(0);

if (ShAmt) {

// Perform a logical shr by shiftamt.

@@ -1184,14 +1184,14 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {

Value *X;

if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&

X->getType() == DestTy)

- return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, DestTy));

+ return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));

// zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).

Value *And;

if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&

match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&

X->getType() == DestTy) {

- Constant *ZC = ConstantExpr::getZExt(C, DestTy);

+ Value *ZC = Builder.CreateZExt(C, DestTy);

return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);

}

@@ -1202,7 +1202,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {

// zext (and (trunc X), C) --> and X, (zext C)

if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&

X->getType() == DestTy) {

- Constant *ZextC = ConstantExpr::getZExt(C, DestTy);

+ Value *ZextC = Builder.CreateZExt(C, DestTy);

return BinaryOperator::CreateAnd(X, ZextC);

}

@@ -1221,6 +1221,22 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {

}

+ if (!Zext.hasNonNeg()) {

+ // If this zero extend is only used by a shift, add nneg flag.

+ if (Zext.hasOneUse() &&

+ SrcTy->getScalarSizeInBits() >

+ Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&

+ match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {

+ Zext.setNonNeg();

+ return &Zext;

+ }

+ if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {

+ Zext.setNonNeg();

+ return &Zext;

+ }

return nullptr;

}

@@ -1373,8 +1389,11 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {

unsigned DestBitSize = DestTy->getScalarSizeInBits();

// If the value being extended is zero or positive, use a zext instead.

- if (isKnownNonNegative(Src, DL, 0, &AC, &Sext, &DT))

- return CastInst::Create(Instruction::ZExt, Src, DestTy);

+ if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {

+ auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);

+ CI->setNonNeg(true);

+ return CI;

+ }

// Try to extend the entire expression tree to the wide destination type.

if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {

@@ -1445,9 +1464,11 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {

// TODO: Eventually this could be subsumed by EvaluateInDifferentType.

Constant *BA = nullptr, *CA = nullptr;

if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),

- m_Constant(CA))) &&

+ m_ImmConstant(CA))) &&

BA->isElementWiseEqual(CA) && A->getType() == DestTy) {

- Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);

+ Constant *WideCurrShAmt =

+ ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);

+ assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");

Constant *NumLowbitsLeft = ConstantExpr::getSub(

ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);

Constant *NewShAmt = ConstantExpr::getSub(

@@ -1915,29 +1936,6 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {

return nullptr;

}

-/// Implement the transforms for cast of pointer (bitcast/ptrtoint)

-Instruction *InstCombinerImpl::commonPointerCastTransforms(CastInst &CI) {

- Value *Src = CI.getOperand(0);

- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {

- // If casting the result of a getelementptr instruction with no offset, turn

- // this into a cast of the original pointer!

- if (GEP->hasAllZeroIndices() &&

- // If CI is an addrspacecast and GEP changes the poiner type, merging

- // GEP into CI would undo canonicalizing addrspacecast with different

- // pointer types, causing infinite loops.

- (!isa<AddrSpaceCastInst>(CI) ||

- GEP->getType() == GEP->getPointerOperandType())) {

- // Changing the cast operand is usually not a good idea but it is safe

- // here because the pointer operand is being replaced with another

- // pointer operand so the opcode doesn't need to change.

- return replaceOperand(CI, 0, GEP->getOperand(0));

- }

- return commonCastTransforms(CI);

Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {

// If the destination integer type is not the intptr_t type for this target,

// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast

@@ -1955,6 +1953,15 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {

return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);

}

+ // (ptrtoint (ptrmask P, M))

+ // -> (and (ptrtoint P), M)

+ // This is generally beneficial as `and` is better supported than `ptrmask`.

+ Value *Ptr, *Mask;

+ if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),

+ m_Value(Mask)))) &&

+ Mask->getType() == Ty)

+ return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);

if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {

// Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.

// While this can increase the number of instructions it doesn't actually

@@ -1979,7 +1986,7 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {

return InsertElementInst::Create(Vec, NewCast, Index);

}

- return commonPointerCastTransforms(CI);

+ return commonCastTransforms(CI);

}

/// This input value (which is known to have vector type) is being zero extended

@@ -2136,9 +2143,12 @@ static bool collectInsertionElements(Value *V, unsigned Shift,

Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);

for (unsigned i = 0; i != NumElts; ++i) {

- unsigned ShiftI = Shift+i*ElementSize;

- Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),

- ShiftI));

+ unsigned ShiftI = Shift + i * ElementSize;

+ Constant *Piece = ConstantFoldBinaryInstruction(

+ Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));

+ if (!Piece)

+ return false;

Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);

if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,

isBigEndian))

@@ -2701,11 +2711,9 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {

if (Instruction *I = foldBitCastSelect(CI, Builder))

return I;

- if (SrcTy->isPointerTy())

- return commonPointerCastTransforms(CI);

return commonCastTransforms(CI);

}

Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {

- return commonPointerCastTransforms(CI);

+ return commonCastTransforms(CI);

}

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 656f04370e17..e42e011bd436 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

@@ -12,12 +12,14 @@

#include "InstCombineInternal.h"

#include "llvm/ADT/APSInt.h"

+#include "llvm/ADT/ScopeExit.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/CaptureTracking.h"

#include "llvm/Analysis/CmpInstAnalysis.h"

#include "llvm/Analysis/ConstantFolding.h"

#include "llvm/Analysis/InstructionSimplify.h"

+#include "llvm/Analysis/Utils/Local.h"

#include "llvm/Analysis/VectorUtils.h"

#include "llvm/IR/ConstantRange.h"

#include "llvm/IR/DataLayout.h"

@@ -26,6 +28,7 @@

#include "llvm/IR/PatternMatch.h"

#include "llvm/Support/KnownBits.h"

#include "llvm/Transforms/InstCombine/InstCombiner.h"

+#include <bitset>

using namespace llvm;

using namespace PatternMatch;

@@ -412,7 +415,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(

/// Returns true if we can rewrite Start as a GEP with pointer Base

/// and some integer offset. The nodes that need to be re-written

/// for this transformation will be added to Explored.

-static bool canRewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,

+static bool canRewriteGEPAsOffset(Value *Start, Value *Base,

const DataLayout &DL,

SetVector<Value *> &Explored) {

SmallVector<Value *, 16> WorkList(1, Start);

@@ -440,27 +443,15 @@ static bool canRewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,

continue;

}

- if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&

- !isa<GetElementPtrInst>(V) && !isa<PHINode>(V))

+ if (!isa<GetElementPtrInst>(V) && !isa<PHINode>(V))

// We've found some value that we can't explore which is different from

// the base. Therefore we can't do this transformation.

return false;

- if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {

- auto *CI = cast<CastInst>(V);

- if (!CI->isNoopCast(DL))

- return false;

- if (!Explored.contains(CI->getOperand(0)))

- WorkList.push_back(CI->getOperand(0));

- }

if (auto *GEP = dyn_cast<GEPOperator>(V)) {

- // We're limiting the GEP to having one index. This will preserve

- // the original pointer type. We could handle more cases in the

- // future.

- if (GEP->getNumIndices() != 1 || !GEP->isInBounds() ||

- GEP->getSourceElementType() != ElemTy)

+ // Only allow inbounds GEPs with at most one variable offset.

+ auto IsNonConst = [](Value *V) { return !isa<ConstantInt>(V); };

+ if (!GEP->isInBounds() || count_if(GEP->indices(), IsNonConst) > 1)

return false;

if (!Explored.contains(GEP->getOperand(0)))

@@ -514,7 +505,8 @@ static bool canRewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,

static void setInsertionPoint(IRBuilder<> &Builder, Value *V,

bool Before = true) {

if (auto *PHI = dyn_cast<PHINode>(V)) {

- Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());

+ BasicBlock *Parent = PHI->getParent();

+ Builder.SetInsertPoint(Parent, Parent->getFirstInsertionPt());

return;

}

if (auto *I = dyn_cast<Instruction>(V)) {

@@ -526,7 +518,7 @@ static void setInsertionPoint(IRBuilder<> &Builder, Value *V,

if (auto *A = dyn_cast<Argument>(V)) {

// Set the insertion point in the entry block.

BasicBlock &Entry = A->getParent()->getEntryBlock();

- Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());

+ Builder.SetInsertPoint(&Entry, Entry.getFirstInsertionPt());

return;

}

// Otherwise, this is a constant and we don't need to set a new

@@ -536,7 +528,7 @@ static void setInsertionPoint(IRBuilder<> &Builder, Value *V,

/// Returns a re-written value of Start as an indexed GEP using Base as a

/// pointer.

-static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,

+static Value *rewriteGEPAsOffset(Value *Start, Value *Base,

const DataLayout &DL,

SetVector<Value *> &Explored,

InstCombiner &IC) {

@@ -567,36 +559,18 @@ static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,

// Create all the other instructions.

for (Value *Val : Explored) {

if (NewInsts.contains(Val))

continue;

- if (auto *CI = dyn_cast<CastInst>(Val)) {

- // Don't get rid of the intermediate variable here; the store can grow

- // the map which will invalidate the reference to the input value.

- Value *V = NewInsts[CI->getOperand(0)];

- NewInsts[CI] = V;

- continue;

- }

if (auto *GEP = dyn_cast<GEPOperator>(Val)) {

- Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)]

- : GEP->getOperand(1);

setInsertionPoint(Builder, GEP);

- // Indices might need to be sign extended. GEPs will magically do

- // this, but we need to do it ourselves here.

- if (Index->getType()->getScalarSizeInBits() !=

- NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {

- Index = Builder.CreateSExtOrTrunc(

- Index, NewInsts[GEP->getOperand(0)]->getType(),

- GEP->getOperand(0)->getName() + ".sext");

- }

- auto *Op = NewInsts[GEP->getOperand(0)];

+ Value *Op = NewInsts[GEP->getOperand(0)];

+ Value *OffsetV = emitGEPOffset(&Builder, DL, GEP);

if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())

- NewInsts[GEP] = Index;

+ NewInsts[GEP] = OffsetV;

else

NewInsts[GEP] = Builder.CreateNSWAdd(

- Op, Index, GEP->getOperand(0)->getName() + ".add");

+ Op, OffsetV, GEP->getOperand(0)->getName() + ".add");

continue;

}

if (isa<PHINode>(Val))

@@ -624,23 +598,14 @@ static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,

}

- PointerType *PtrTy =

- ElemTy->getPointerTo(Start->getType()->getPointerAddressSpace());

for (Value *Val : Explored) {

if (Val == Base)

continue;

- // Depending on the type, for external users we have to emit

- // a GEP or a GEP + ptrtoint.

setInsertionPoint(Builder, Val, false);

- // Cast base to the expected type.

- Value *NewVal = Builder.CreateBitOrPointerCast(

- Base, PtrTy, Start->getName() + "to.ptr");

- NewVal = Builder.CreateInBoundsGEP(ElemTy, NewVal, ArrayRef(NewInsts[Val]),

- Val->getName() + ".ptr");

- NewVal = Builder.CreateBitOrPointerCast(

- NewVal, Val->getType(), Val->getName() + ".conv");

+ // Create GEP for external users.

+ Value *NewVal = Builder.CreateInBoundsGEP(

+ Builder.getInt8Ty(), Base, NewInsts[Val], Val->getName() + ".ptr");

IC.replaceInstUsesWith(*cast<Instruction>(Val), NewVal);

// Add old instruction to worklist for DCE. We don't directly remove it

// here because the original compare is one of the users.

@@ -650,48 +615,6 @@ static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,

return NewInsts[Start];

}

-/// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express

-/// the input Value as a constant indexed GEP. Returns a pair containing

-/// the GEPs Pointer and Index.

-static std::pair<Value *, Value *>

-getAsConstantIndexedAddress(Type *ElemTy, Value *V, const DataLayout &DL) {

- Type *IndexType = IntegerType::get(V->getContext(),

- DL.getIndexTypeSizeInBits(V->getType()));

- Constant *Index = ConstantInt::getNullValue(IndexType);

- while (true) {

- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {

- // We accept only inbouds GEPs here to exclude the possibility of

- // overflow.

- if (!GEP->isInBounds())

- break;

- if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 &&

- GEP->getSourceElementType() == ElemTy) {

- V = GEP->getOperand(0);

- Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));

- Index = ConstantExpr::getAdd(

- Index, ConstantExpr::getSExtOrTrunc(GEPIndex, IndexType));

- continue;

- }

- break;

- }

- if (auto *CI = dyn_cast<IntToPtrInst>(V)) {

- if (!CI->isNoopCast(DL))

- break;

- V = CI->getOperand(0);

- continue;

- }

- if (auto *CI = dyn_cast<PtrToIntInst>(V)) {

- if (!CI->isNoopCast(DL))

- break;

- V = CI->getOperand(0);

- continue;

- }

- break;

- }

- return {V, Index};

/// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.

/// We can look through PHIs, GEPs and casts in order to determine a common base

/// between GEPLHS and RHS.

@@ -706,14 +629,19 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,

if (!GEPLHS->hasAllConstantIndices())

return nullptr;

- Type *ElemTy = GEPLHS->getSourceElementType();

- Value *PtrBase, *Index;

- std::tie(PtrBase, Index) = getAsConstantIndexedAddress(ElemTy, GEPLHS, DL);

+ APInt Offset(DL.getIndexTypeSizeInBits(GEPLHS->getType()), 0);

+ Value *PtrBase =

+ GEPLHS->stripAndAccumulateConstantOffsets(DL, Offset,

+ /*AllowNonInbounds*/ false);

+ // Bail if we looked through addrspacecast.

+ if (PtrBase->getType() != GEPLHS->getType())

+ return nullptr;

// The set of nodes that will take part in this transformation.

SetVector<Value *> Nodes;

- if (!canRewriteGEPAsOffset(ElemTy, RHS, PtrBase, DL, Nodes))

+ if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))

return nullptr;

// We know we can re-write this as

@@ -722,13 +650,14 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,

// can't have overflow on either side. We can therefore re-write

// this as:

// OFFSET1 cmp OFFSET2

- Value *NewRHS = rewriteGEPAsOffset(ElemTy, RHS, PtrBase, DL, Nodes, IC);

+ Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes, IC);

// RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written

// GEP having PtrBase as the pointer base, and has returned in NewRHS the

// offset. Since Index is the offset of LHS to the base pointer, we will now

// compare the offsets instead of comparing the pointers.

- return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);

+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond),

+ IC.Builder.getInt(Offset), NewRHS);

}

/// Fold comparisons between a GEP instruction and something else. At this point

@@ -844,17 +773,6 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,

return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);

}

- // If one of the GEPs has all zero indices, recurse.

- // FIXME: Handle vector of pointers.

- if (!GEPLHS->getType()->isVectorTy() && GEPLHS->hasAllZeroIndices())

- return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0),

- ICmpInst::getSwappedPredicate(Cond), I);

- // If the other GEP has all zero indices, recurse.

- // FIXME: Handle vector of pointers.

- if (!GEPRHS->getType()->isVectorTy() && GEPRHS->hasAllZeroIndices())

- return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);

bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();

if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&

GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType()) {

@@ -894,8 +812,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,

// Only lower this if the icmp is the only user of the GEP or if we expect

// the result to fold to a constant!

if ((GEPsInBounds || CmpInst::isEquality(Cond)) &&

- (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&

- (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {

+ (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&

+ (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse())) {

// ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)

Value *L = EmitGEPOffset(GEPLHS);

Value *R = EmitGEPOffset(GEPRHS);

@@ -1285,9 +1203,9 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {

if (Pred == ICmpInst::ICMP_SGT) {

Value *A, *B;

if (match(Cmp.getOperand(0), m_SMin(m_Value(A), m_Value(B)))) {

- if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))

+ if (isKnownPositive(A, SQ.getWithInstruction(&Cmp)))

return new ICmpInst(Pred, B, Cmp.getOperand(1));

- if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT))

+ if (isKnownPositive(B, SQ.getWithInstruction(&Cmp)))

return new ICmpInst(Pred, A, Cmp.getOperand(1));

}

@@ -1554,6 +1472,61 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,

return nullptr;

}

+/// Fold icmp (trunc X), (trunc Y).

+/// Fold icmp (trunc X), (zext Y).

+Instruction *

+InstCombinerImpl::foldICmpTruncWithTruncOrExt(ICmpInst &Cmp,

+ const SimplifyQuery &Q) {

+ if (Cmp.isSigned())

+ return nullptr;

+ Value *X, *Y;

+ ICmpInst::Predicate Pred;

+ bool YIsZext = false;

+ // Try to match icmp (trunc X), (trunc Y)

+ if (match(&Cmp, m_ICmp(Pred, m_Trunc(m_Value(X)), m_Trunc(m_Value(Y))))) {

+ if (X->getType() != Y->getType() &&

+ (!Cmp.getOperand(0)->hasOneUse() || !Cmp.getOperand(1)->hasOneUse()))

+ return nullptr;

+ if (!isDesirableIntType(X->getType()->getScalarSizeInBits()) &&

+ isDesirableIntType(Y->getType()->getScalarSizeInBits())) {

+ std::swap(X, Y);

+ Pred = Cmp.getSwappedPredicate(Pred);

+ }

+ // Try to match icmp (trunc X), (zext Y)

+ else if (match(&Cmp, m_c_ICmp(Pred, m_Trunc(m_Value(X)),

+ m_OneUse(m_ZExt(m_Value(Y))))))

+ YIsZext = true;

+ else

+ return nullptr;

+ Type *TruncTy = Cmp.getOperand(0)->getType();

+ unsigned TruncBits = TruncTy->getScalarSizeInBits();

+ // If this transform will end up changing from desirable types -> undesirable

+ // types skip it.

+ if (isDesirableIntType(TruncBits) &&

+ !isDesirableIntType(X->getType()->getScalarSizeInBits()))

+ return nullptr;

+ // Check if the trunc is unneeded.

+ KnownBits KnownX = llvm::computeKnownBits(X, /*Depth*/ 0, Q);

+ if (KnownX.countMaxActiveBits() > TruncBits)

+ return nullptr;

+ if (!YIsZext) {

+ // If Y is also a trunc, make sure it is unneeded.

+ KnownBits KnownY = llvm::computeKnownBits(Y, /*Depth*/ 0, Q);

+ if (KnownY.countMaxActiveBits() > TruncBits)

+ return nullptr;

+ }

+ Value *NewY = Builder.CreateZExtOrTrunc(Y, X->getType());

+ return new ICmpInst(Pred, X, NewY);

/// Fold icmp (xor X, Y), C.

Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp,

BinaryOperator *Xor,

@@ -1944,19 +1917,18 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,

return nullptr;

}

-/// Fold icmp eq/ne (or (xor (X1, X2), xor(X3, X4))), 0.

-static Value *foldICmpOrXorChain(ICmpInst &Cmp, BinaryOperator *Or,

- InstCombiner::BuilderTy &Builder) {

- // Are we using xors to bitwise check for a pair or pairs of (in)equalities?

- // Convert to a shorter form that has more potential to be folded even

- // further.

- // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)

- // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)

- // ((X1 ^ X2) || (X3 ^ X4) || (X5 ^ X6)) == 0 -->

+/// Fold icmp eq/ne (or (xor/sub (X1, X2), xor/sub (X3, X4))), 0.

+static Value *foldICmpOrXorSubChain(ICmpInst &Cmp, BinaryOperator *Or,

+ InstCombiner::BuilderTy &Builder) {

+ // Are we using xors or subs to bitwise check for a pair or pairs of

+ // (in)equalities? Convert to a shorter form that has more potential to be

+ // folded even further.

+ // ((X1 ^/- X2) || (X3 ^/- X4)) == 0 --> (X1 == X2) && (X3 == X4)

+ // ((X1 ^/- X2) || (X3 ^/- X4)) != 0 --> (X1 != X2) || (X3 != X4)

+ // ((X1 ^/- X2) || (X3 ^/- X4) || (X5 ^/- X6)) == 0 -->

// (X1 == X2) && (X3 == X4) && (X5 == X6)

- // ((X1 ^ X2) || (X3 ^ X4) || (X5 ^ X6)) != 0 -->

+ // ((X1 ^/- X2) || (X3 ^/- X4) || (X5 ^/- X6)) != 0 -->

// (X1 != X2) || (X3 != X4) || (X5 != X6)

- // TODO: Implement for sub

SmallVector<std::pair<Value *, Value *>, 2> CmpValues;

SmallVector<Value *, 16> WorkList(1, Or);

@@ -1967,9 +1939,16 @@ static Value *foldICmpOrXorChain(ICmpInst &Cmp, BinaryOperator *Or,

if (match(OrOperatorArgument,

m_OneUse(m_Xor(m_Value(Lhs), m_Value(Rhs))))) {

CmpValues.emplace_back(Lhs, Rhs);

- } else {

- WorkList.push_back(OrOperatorArgument);

+ return;

}

+ if (match(OrOperatorArgument,

+ m_OneUse(m_Sub(m_Value(Lhs), m_Value(Rhs))))) {

+ CmpValues.emplace_back(Lhs, Rhs);

+ return;

+ }

+ WorkList.push_back(OrOperatorArgument);

};

Value *CurrentValue = WorkList.pop_back_val();

@@ -2082,7 +2061,7 @@ Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,

return BinaryOperator::Create(BOpc, CmpP, CmpQ);

}

- if (Value *V = foldICmpOrXorChain(Cmp, Or, Builder))

+ if (Value *V = foldICmpOrXorSubChain(Cmp, Or, Builder))

return replaceInstUsesWith(Cmp, V);

return nullptr;

@@ -2443,7 +2422,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,

// constant-value-based preconditions in the folds below, then we could assert

// those conditions rather than checking them. This is difficult because of

// undef/poison (PR34838).

- if (IsAShr) {

+ if (IsAShr && Shr->hasOneUse()) {

if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) {

// When ShAmtC can be shifted losslessly:

// icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC)

@@ -2483,7 +2462,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,

ConstantInt::getAllOnesValue(ShrTy));

}

- } else {

+ } else if (!IsAShr) {

if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {

// icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)

// icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)

@@ -2888,19 +2867,97 @@ Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,

return new ICmpInst(SwappedPred, Add, ConstantInt::get(Ty, ~C));

}

+static Value *createLogicFromTable(const std::bitset<4> &Table, Value *Op0,

+ Value *Op1, IRBuilderBase &Builder,

+ bool HasOneUse) {

+ auto FoldConstant = [&](bool Val) {

+ Constant *Res = Val ? Builder.getTrue() : Builder.getFalse();

+ if (Op0->getType()->isVectorTy())

+ Res = ConstantVector::getSplat(

+ cast<VectorType>(Op0->getType())->getElementCount(), Res);

+ return Res;

+ };

+ switch (Table.to_ulong()) {

+ case 0: // 0 0 0 0

+ return FoldConstant(false);

+ case 1: // 0 0 0 1

+ return HasOneUse ? Builder.CreateNot(Builder.CreateOr(Op0, Op1)) : nullptr;

+ case 2: // 0 0 1 0

+ return HasOneUse ? Builder.CreateAnd(Builder.CreateNot(Op0), Op1) : nullptr;

+ case 3: // 0 0 1 1

+ return Builder.CreateNot(Op0);

+ case 4: // 0 1 0 0

+ return HasOneUse ? Builder.CreateAnd(Op0, Builder.CreateNot(Op1)) : nullptr;

+ case 5: // 0 1 0 1

+ return Builder.CreateNot(Op1);

+ case 6: // 0 1 1 0

+ return Builder.CreateXor(Op0, Op1);

+ case 7: // 0 1 1 1

+ return HasOneUse ? Builder.CreateNot(Builder.CreateAnd(Op0, Op1)) : nullptr;

+ case 8: // 1 0 0 0

+ return Builder.CreateAnd(Op0, Op1);

+ case 9: // 1 0 0 1

+ return HasOneUse ? Builder.CreateNot(Builder.CreateXor(Op0, Op1)) : nullptr;

+ case 10: // 1 0 1 0

+ return Op1;

+ case 11: // 1 0 1 1

+ return HasOneUse ? Builder.CreateOr(Builder.CreateNot(Op0), Op1) : nullptr;

+ case 12: // 1 1 0 0

+ return Op0;

+ case 13: // 1 1 0 1

+ return HasOneUse ? Builder.CreateOr(Op0, Builder.CreateNot(Op1)) : nullptr;

+ case 14: // 1 1 1 0

+ return Builder.CreateOr(Op0, Op1);

+ case 15: // 1 1 1 1

+ return FoldConstant(true);

+ default:

+ llvm_unreachable("Invalid Operation");

+ }

+ return nullptr;

/// Fold icmp (add X, Y), C.

Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,

BinaryOperator *Add,

const APInt &C) {

Value *Y = Add->getOperand(1);

+ Value *X = Add->getOperand(0);

+ Value *Op0, *Op1;

+ Instruction *Ext0, *Ext1;

+ const CmpInst::Predicate Pred = Cmp.getPredicate();

+ if (match(Add,

+ m_Add(m_CombineAnd(m_Instruction(Ext0), m_ZExtOrSExt(m_Value(Op0))),

+ m_CombineAnd(m_Instruction(Ext1),

+ m_ZExtOrSExt(m_Value(Op1))))) &&

+ Op0->getType()->isIntOrIntVectorTy(1) &&

+ Op1->getType()->isIntOrIntVectorTy(1)) {

+ unsigned BW = C.getBitWidth();

+ std::bitset<4> Table;

+ auto ComputeTable = [&](bool Op0Val, bool Op1Val) {

+ int Res = 0;

+ if (Op0Val)

+ Res += isa<ZExtInst>(Ext0) ? 1 : -1;

+ if (Op1Val)

+ Res += isa<ZExtInst>(Ext1) ? 1 : -1;

+ return ICmpInst::compare(APInt(BW, Res, true), C, Pred);

+ };

+ Table[0] = ComputeTable(false, false);

+ Table[1] = ComputeTable(false, true);

+ Table[2] = ComputeTable(true, false);

+ Table[3] = ComputeTable(true, true);

+ if (auto *Cond =

+ createLogicFromTable(Table, Op0, Op1, Builder, Add->hasOneUse()))

+ return replaceInstUsesWith(Cmp, Cond);

+ }

const APInt *C2;

if (Cmp.isEquality() || !match(Y, m_APInt(C2)))

return nullptr;

// Fold icmp pred (add X, C2), C.

- Value *X = Add->getOperand(0);

Type *Ty = Add->getType();

- const CmpInst::Predicate Pred = Cmp.getPredicate();

// If the add does not wrap, we can always adjust the compare by subtracting

// the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE

@@ -3172,18 +3229,6 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {

}

- // Test to see if the operands of the icmp are casted versions of other

- // values. If the ptr->ptr cast can be stripped off both arguments, do so.

- if (DstType->isPointerTy() && (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {

- // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast

- // so eliminate it as well.

- if (auto *BC2 = dyn_cast<BitCastInst>(Op1))

- Op1 = BC2->getOperand(0);

- Op1 = Builder.CreateBitCast(Op1, SrcType);

- return new ICmpInst(Pred, BCSrcOp, Op1);

- }

const APInt *C;

if (!match(Cmp.getOperand(1), m_APInt(C)) || !DstType->isIntegerTy() ||

!SrcType->isIntOrIntVectorTy())

@@ -3196,10 +3241,12 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {

// icmp eq/ne (bitcast (not X) to iN), -1 --> icmp eq/ne (bitcast X to iN), 0

// Example: are all elements equal? --> are zero elements not equal?

// TODO: Try harder to reduce compare of 2 freely invertible operands?

- if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse() &&

- isFreeToInvert(BCSrcOp, BCSrcOp->hasOneUse())) {

- Value *Cast = Builder.CreateBitCast(Builder.CreateNot(BCSrcOp), DstType);

- return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType));

+ if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse()) {

+ if (Value *NotBCSrcOp =

+ getFreelyInverted(BCSrcOp, BCSrcOp->hasOneUse(), &Builder)) {

+ Value *Cast = Builder.CreateBitCast(NotBCSrcOp, DstType);

+ return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType));

+ }

}

// If this is checking if all elements of an extended vector are clear or not,

@@ -3878,21 +3925,9 @@ Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) {

return nullptr;

switch (LHSI->getOpcode()) {

- case Instruction::GetElementPtr:

- // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null

- if (RHSC->isNullValue() &&

- cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())

- return new ICmpInst(

- I.getPredicate(), LHSI->getOperand(0),

- Constant::getNullValue(LHSI->getOperand(0)->getType()));

- break;

case Instruction::PHI:

- // Only fold icmp into the PHI if the phi and icmp are in the same

- // block. If in the same block, we're encouraging jump threading. If

- // not, we are just pessimizing the code by making an i1 phi.

- if (LHSI->getParent() == I.getParent())

- if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))

- return NV;

+ if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))

+ return NV;

break;

case Instruction::IntToPtr:

// icmp pred inttoptr(X), null -> icmp pred X, 0

@@ -4243,7 +4278,12 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,

/*isNUW=*/false, SQ.getWithInstruction(&I)));

if (!NewShAmt)

return nullptr;

- NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy);

+ if (NewShAmt->getType() != WidestTy) {

+ NewShAmt =

+ ConstantFoldCastOperand(Instruction::ZExt, NewShAmt, WidestTy, SQ.DL);

+ if (!NewShAmt)

+ return nullptr;

+ }

unsigned WidestBitWidth = WidestTy->getScalarSizeInBits();

// Is the new shift amount smaller than the bit width?

@@ -4424,6 +4464,65 @@ static Instruction *foldICmpXNegX(ICmpInst &I,

return nullptr;

}

+static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q,

+ InstCombinerImpl &IC) {

+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;

+ // Normalize and operand as operand 0.

+ CmpInst::Predicate Pred = I.getPredicate();

+ if (match(Op1, m_c_And(m_Specific(Op0), m_Value()))) {

+ std::swap(Op0, Op1);

+ Pred = ICmpInst::getSwappedPredicate(Pred);

+ }

+ if (!match(Op0, m_c_And(m_Specific(Op1), m_Value(A))))

+ return nullptr;

+ // (icmp (X & Y) u< X --> (X & Y) != X

+ if (Pred == ICmpInst::ICMP_ULT)

+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);

+ // (icmp (X & Y) u>= X --> (X & Y) == X

+ if (Pred == ICmpInst::ICMP_UGE)

+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);

+ return nullptr;

+static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,

+ InstCombinerImpl &IC) {

+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;

+ // Normalize or operand as operand 0.

+ CmpInst::Predicate Pred = I.getPredicate();

+ if (match(Op1, m_c_Or(m_Specific(Op0), m_Value(A)))) {

+ std::swap(Op0, Op1);

+ Pred = ICmpInst::getSwappedPredicate(Pred);

+ } else if (!match(Op0, m_c_Or(m_Specific(Op1), m_Value(A)))) {

+ return nullptr;

+ }

+ // icmp (X | Y) u<= X --> (X | Y) == X

+ if (Pred == ICmpInst::ICMP_ULE)

+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);

+ // icmp (X | Y) u> X --> (X | Y) != X

+ if (Pred == ICmpInst::ICMP_UGT)

+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);

+ if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {

+ // icmp (X | Y) eq/ne Y --> (X & ~Y) eq/ne 0 if Y is freely invertible

+ if (Value *NotOp1 =

+ IC.getFreelyInverted(Op1, Op1->hasOneUse(), &IC.Builder))

+ return new ICmpInst(Pred, IC.Builder.CreateAnd(A, NotOp1),

+ Constant::getNullValue(Op1->getType()));

+ // icmp (X | Y) eq/ne Y --> (~X | Y) eq/ne -1 if X is freely invertible.

+ if (Value *NotA = IC.getFreelyInverted(A, A->hasOneUse(), &IC.Builder))

+ return new ICmpInst(Pred, IC.Builder.CreateOr(Op1, NotA),

+ Constant::getAllOnesValue(Op1->getType()));

+ }

+ return nullptr;

static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,

InstCombinerImpl &IC) {

Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;

@@ -4746,6 +4845,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,

if (Instruction * R = foldICmpXorXX(I, Q, *this))

return R;

+ if (Instruction *R = foldICmpOrXX(I, Q, *this))

+ return R;

{

// Try to remove shared multiplier from comparison:

@@ -4915,6 +5016,9 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,

if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))

return replaceInstUsesWith(I, V);

+ if (Instruction *R = foldICmpAndXX(I, Q, *this))

+ return R;

if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))

return replaceInstUsesWith(I, V);

@@ -4924,88 +5028,153 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,

return nullptr;

}

-/// Fold icmp Pred min|max(X, Y), X.

-static Instruction *foldICmpWithMinMax(ICmpInst &Cmp) {

- ICmpInst::Predicate Pred = Cmp.getPredicate();

- Value *Op0 = Cmp.getOperand(0);

- Value *X = Cmp.getOperand(1);

- // Canonicalize minimum or maximum operand to LHS of the icmp.

- if (match(X, m_c_SMin(m_Specific(Op0), m_Value())) ||

- match(X, m_c_SMax(m_Specific(Op0), m_Value())) ||

- match(X, m_c_UMin(m_Specific(Op0), m_Value())) ||

- match(X, m_c_UMax(m_Specific(Op0), m_Value()))) {

- std::swap(Op0, X);

- Pred = Cmp.getSwappedPredicate();

- }

- Value *Y;

- if (match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) {

- // smin(X, Y) == X --> X s<= Y

- // smin(X, Y) s>= X --> X s<= Y

- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE)

- return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);

- // smin(X, Y) != X --> X s> Y

- // smin(X, Y) s< X --> X s> Y

- if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT)

- return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);

- // These cases should be handled in InstSimplify:

- // smin(X, Y) s<= X --> true

- // smin(X, Y) s> X --> false

+/// Fold icmp Pred min|max(X, Y), Z.

+Instruction *

+InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I,

+ MinMaxIntrinsic *MinMax, Value *Z,

+ ICmpInst::Predicate Pred) {

+ Value *X = MinMax->getLHS();

+ Value *Y = MinMax->getRHS();

+ if (ICmpInst::isSigned(Pred) && !MinMax->isSigned())

return nullptr;

- }

- if (match(Op0, m_c_SMax(m_Specific(X), m_Value(Y)))) {

- // smax(X, Y) == X --> X s>= Y

- // smax(X, Y) s<= X --> X s>= Y

- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLE)

- return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);

- // smax(X, Y) != X --> X s< Y

- // smax(X, Y) s> X --> X s< Y

- if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SGT)

- return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);

- // These cases should be handled in InstSimplify:

- // smax(X, Y) s>= X --> true

- // smax(X, Y) s< X --> false

+ if (ICmpInst::isUnsigned(Pred) && MinMax->isSigned())

return nullptr;

+ SimplifyQuery Q = SQ.getWithInstruction(&I);

+ auto IsCondKnownTrue = [](Value *Val) -> std::optional<bool> {

+ if (!Val)

+ return std::nullopt;

+ if (match(Val, m_One()))

+ return true;

+ if (match(Val, m_Zero()))

+ return false;

+ return std::nullopt;

+ };

+ auto CmpXZ = IsCondKnownTrue(simplifyICmpInst(Pred, X, Z, Q));

+ auto CmpYZ = IsCondKnownTrue(simplifyICmpInst(Pred, Y, Z, Q));

+ if (!CmpXZ.has_value() && !CmpYZ.has_value())

+ return nullptr;

+ if (!CmpXZ.has_value()) {

+ std::swap(X, Y);

+ std::swap(CmpXZ, CmpYZ);

}

- if (match(Op0, m_c_UMin(m_Specific(X), m_Value(Y)))) {

- // umin(X, Y) == X --> X u<= Y

- // umin(X, Y) u>= X --> X u<= Y

- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_UGE)

- return new ICmpInst(ICmpInst::ICMP_ULE, X, Y);

- // umin(X, Y) != X --> X u> Y

- // umin(X, Y) u< X --> X u> Y

- if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT)

- return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);

+ auto FoldIntoCmpYZ = [&]() -> Instruction * {

+ if (CmpYZ.has_value())

+ return replaceInstUsesWith(I, ConstantInt::getBool(I.getType(), *CmpYZ));

+ return ICmpInst::Create(Instruction::ICmp, Pred, Y, Z);

+ };

- // These cases should be handled in InstSimplify:

- // umin(X, Y) u<= X --> true

- // umin(X, Y) u> X --> false

- return nullptr;

+ switch (Pred) {

+ case ICmpInst::ICMP_EQ:

+ case ICmpInst::ICMP_NE: {

+ // If X == Z:

+ // Expr Result

+ // min(X, Y) == Z X <= Y

+ // max(X, Y) == Z X >= Y

+ // min(X, Y) != Z X > Y

+ // max(X, Y) != Z X < Y

+ if ((Pred == ICmpInst::ICMP_EQ) == *CmpXZ) {

+ ICmpInst::Predicate NewPred =

+ ICmpInst::getNonStrictPredicate(MinMax->getPredicate());

+ if (Pred == ICmpInst::ICMP_NE)

+ NewPred = ICmpInst::getInversePredicate(NewPred);

+ return ICmpInst::Create(Instruction::ICmp, NewPred, X, Y);

+ }

+ // Otherwise (X != Z):

+ ICmpInst::Predicate NewPred = MinMax->getPredicate();

+ auto MinMaxCmpXZ = IsCondKnownTrue(simplifyICmpInst(NewPred, X, Z, Q));

+ if (!MinMaxCmpXZ.has_value()) {

+ std::swap(X, Y);

+ std::swap(CmpXZ, CmpYZ);

+ // Re-check pre-condition X != Z

+ if (!CmpXZ.has_value() || (Pred == ICmpInst::ICMP_EQ) == *CmpXZ)

+ break;

+ MinMaxCmpXZ = IsCondKnownTrue(simplifyICmpInst(NewPred, X, Z, Q));

+ }

+ if (!MinMaxCmpXZ.has_value())

+ break;

+ if (*MinMaxCmpXZ) {

+ // Expr Fact Result

+ // min(X, Y) == Z X < Z false

+ // max(X, Y) == Z X > Z false

+ // min(X, Y) != Z X < Z true

+ // max(X, Y) != Z X > Z true

+ return replaceInstUsesWith(

+ I, ConstantInt::getBool(I.getType(), Pred == ICmpInst::ICMP_NE));

+ } else {

+ // Expr Fact Result

+ // min(X, Y) == Z X > Z Y == Z

+ // max(X, Y) == Z X < Z Y == Z

+ // min(X, Y) != Z X > Z Y != Z

+ // max(X, Y) != Z X < Z Y != Z

+ return FoldIntoCmpYZ();

+ }

+ break;

+ }

+ case ICmpInst::ICMP_SLT:

+ case ICmpInst::ICMP_ULT:

+ case ICmpInst::ICMP_SLE:

+ case ICmpInst::ICMP_ULE:

+ case ICmpInst::ICMP_SGT:

+ case ICmpInst::ICMP_UGT:

+ case ICmpInst::ICMP_SGE:

+ case ICmpInst::ICMP_UGE: {

+ bool IsSame = MinMax->getPredicate() == ICmpInst::getStrictPredicate(Pred);

+ if (*CmpXZ) {

+ if (IsSame) {

+ // Expr Fact Result

+ // min(X, Y) < Z X < Z true

+ // min(X, Y) <= Z X <= Z true

+ // max(X, Y) > Z X > Z true

+ // max(X, Y) >= Z X >= Z true

+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));

+ } else {

+ // Expr Fact Result

+ // max(X, Y) < Z X < Z Y < Z

+ // max(X, Y) <= Z X <= Z Y <= Z

+ // min(X, Y) > Z X > Z Y > Z

+ // min(X, Y) >= Z X >= Z Y >= Z

+ return FoldIntoCmpYZ();

+ }

+ } else {

+ if (IsSame) {

+ // Expr Fact Result

+ // min(X, Y) < Z X >= Z Y < Z

+ // min(X, Y) <= Z X > Z Y <= Z

+ // max(X, Y) > Z X <= Z Y > Z

+ // max(X, Y) >= Z X < Z Y >= Z

+ return FoldIntoCmpYZ();

+ } else {

+ // Expr Fact Result

+ // max(X, Y) < Z X >= Z false

+ // max(X, Y) <= Z X > Z false

+ // min(X, Y) > Z X <= Z false

+ // min(X, Y) >= Z X < Z false

+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));

+ }

+ break;

+ }

+ default:

+ break;

}

- if (match(Op0, m_c_UMax(m_Specific(X), m_Value(Y)))) {

- // umax(X, Y) == X --> X u>= Y

- // umax(X, Y) u<= X --> X u>= Y

- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_ULE)

- return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);

+ return nullptr;

+Instruction *InstCombinerImpl::foldICmpWithMinMax(ICmpInst &Cmp) {

+ ICmpInst::Predicate Pred = Cmp.getPredicate();

+ Value *Lhs = Cmp.getOperand(0);

+ Value *Rhs = Cmp.getOperand(1);

- // umax(X, Y) != X --> X u< Y

- // umax(X, Y) u> X --> X u< Y

- if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_UGT)

- return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);

+ if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Lhs)) {

+ if (Instruction *Res = foldICmpWithMinMaxImpl(Cmp, MinMax, Rhs, Pred))

+ return Res;

+ }

- // These cases should be handled in InstSimplify:

- // umax(X, Y) u>= X --> true

- // umax(X, Y) u< X --> false

- return nullptr;

+ if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Rhs)) {

+ if (Instruction *Res = foldICmpWithMinMaxImpl(

+ Cmp, MinMax, Lhs, ICmpInst::getSwappedPredicate(Pred)))

+ return Res;

}

return nullptr;

@@ -5173,35 +5342,6 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {

return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));

}

- // Test if 2 values have different or same signbits:

- // (X u>> BitWidth - 1) == zext (Y s> -1) --> (X ^ Y) < 0

- // (X u>> BitWidth - 1) != zext (Y s> -1) --> (X ^ Y) > -1

- // (X s>> BitWidth - 1) == sext (Y s> -1) --> (X ^ Y) < 0

- // (X s>> BitWidth - 1) != sext (Y s> -1) --> (X ^ Y) > -1

- Instruction *ExtI;

- if (match(Op1, m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(A)))) &&

- (Op0->hasOneUse() || Op1->hasOneUse())) {

- unsigned OpWidth = Op0->getType()->getScalarSizeInBits();

- Instruction *ShiftI;

- Value *X, *Y;

- ICmpInst::Predicate Pred2;

- if (match(Op0, m_CombineAnd(m_Instruction(ShiftI),

- m_Shr(m_Value(X),

- m_SpecificIntAllowUndef(OpWidth - 1)))) &&

- match(A, m_ICmp(Pred2, m_Value(Y), m_AllOnes())) &&

- Pred2 == ICmpInst::ICMP_SGT && X->getType() == Y->getType()) {

- unsigned ExtOpc = ExtI->getOpcode();

- unsigned ShiftOpc = ShiftI->getOpcode();

- if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) ||

- (ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) {

- Value *Xor = Builder.CreateXor(X, Y, "xor.signbits");

- Value *R = (Pred == ICmpInst::ICMP_EQ) ? Builder.CreateIsNeg(Xor)

- : Builder.CreateIsNotNeg(Xor);

- return replaceInstUsesWith(I, R);

- }

// (A >> C) == (B >> C) --> (A^B) u< (1 << C)

// For lshr and ashr pairs.

const APInt *AP1, *AP2;

@@ -5307,6 +5447,40 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {

Pred, A,

Builder.CreateIntrinsic(Op0->getType(), Intrinsic::fshl, {A, A, B}));

+ // Canonicalize:

+ // icmp eq/ne OneUse(A ^ Cst), B --> icmp eq/ne (A ^ B), Cst

+ Constant *Cst;

+ if (match(&I, m_c_ICmp(PredUnused,

+ m_OneUse(m_Xor(m_Value(A), m_ImmConstant(Cst))),

+ m_CombineAnd(m_Value(B), m_Unless(m_ImmConstant())))))

+ return new ICmpInst(Pred, Builder.CreateXor(A, B), Cst);

+ {

+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2)

+ auto m_Matcher =

+ m_CombineOr(m_CombineOr(m_c_Add(m_Value(B), m_Deferred(A)),

+ m_c_Xor(m_Value(B), m_Deferred(A))),

+ m_Sub(m_Value(B), m_Deferred(A)));

+ std::optional<bool> IsZero = std::nullopt;

+ if (match(&I, m_c_ICmp(PredUnused, m_OneUse(m_c_And(m_Value(A), m_Matcher)),

+ m_Deferred(A))))

+ IsZero = false;

+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0)

+ else if (match(&I,

+ m_ICmp(PredUnused, m_OneUse(m_c_And(m_Value(A), m_Matcher)),

+ m_Zero())))

+ IsZero = true;

+ if (IsZero && isKnownToBeAPowerOfTwo(A, /* OrZero */ true, /*Depth*/ 0, &I))

+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2)

+ // -> (icmp eq/ne (and X, P2), 0)

+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0)

+ // -> (icmp eq/ne (and X, P2), P2)

+ return new ICmpInst(Pred, Builder.CreateAnd(B, A),

+ *IsZero ? A

+ : ConstantInt::getNullValue(A->getType()));

+ }

return nullptr;

}

@@ -5383,8 +5557,8 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {

// icmp Pred (ext X), (ext Y)

Value *Y;

if (match(ICmp.getOperand(1), m_ZExtOrSExt(m_Value(Y)))) {

- bool IsZext0 = isa<ZExtOperator>(ICmp.getOperand(0));

- bool IsZext1 = isa<ZExtOperator>(ICmp.getOperand(1));

+ bool IsZext0 = isa<ZExtInst>(ICmp.getOperand(0));

+ bool IsZext1 = isa<ZExtInst>(ICmp.getOperand(1));

if (IsZext0 != IsZext1) {

// If X and Y and both i1

@@ -5396,11 +5570,16 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {

return new ICmpInst(ICmp.getPredicate(), Builder.CreateOr(X, Y),

Constant::getNullValue(X->getType()));

- // If we have mismatched casts, treat the zext of a non-negative source as

- // a sext to simulate matching casts. Otherwise, we are done.

- // TODO: Can we handle some predicates (equality) without non-negative?

- if ((IsZext0 && isKnownNonNegative(X, DL, 0, &AC, &ICmp, &DT)) ||

- (IsZext1 && isKnownNonNegative(Y, DL, 0, &AC, &ICmp, &DT)))

+ // If we have mismatched casts and zext has the nneg flag, we can

+ // treat the "zext nneg" as "sext". Otherwise, we cannot fold and quit.

+ auto *NonNegInst0 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(0));

+ auto *NonNegInst1 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(1));

+ bool IsNonNeg0 = NonNegInst0 && NonNegInst0->hasNonNeg();

+ bool IsNonNeg1 = NonNegInst1 && NonNegInst1->hasNonNeg();

+ if ((IsZext0 && IsNonNeg0) || (IsZext1 && IsNonNeg1))

IsSignedExt = true;

else

return nullptr;

@@ -5442,25 +5621,20 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {

if (!C)

return nullptr;

- // Compute the constant that would happen if we truncated to SrcTy then

- // re-extended to DestTy.

+ // If a lossless truncate is possible...

Type *SrcTy = CastOp0->getSrcTy();

- Type *DestTy = CastOp0->getDestTy();

- Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy);

- Constant *Res2 = ConstantExpr::getCast(CastOp0->getOpcode(), Res1, DestTy);

- // If the re-extended constant didn't change...

- if (Res2 == C) {

+ Constant *Res = getLosslessTrunc(C, SrcTy, CastOp0->getOpcode());

+ if (Res) {

if (ICmp.isEquality())

- return new ICmpInst(ICmp.getPredicate(), X, Res1);

+ return new ICmpInst(ICmp.getPredicate(), X, Res);

// A signed comparison of sign extended values simplifies into a

// signed comparison.

if (IsSignedExt && IsSignedCmp)

- return new ICmpInst(ICmp.getPredicate(), X, Res1);

+ return new ICmpInst(ICmp.getPredicate(), X, Res);

// The other three cases all fold into an unsigned comparison.

- return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res1);

+ return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res);

}

// The re-extended constant changed, partly changed (in the case of a vector),

@@ -5518,13 +5692,8 @@ Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) {

Value *NewOp1 = nullptr;

if (auto *PtrToIntOp1 = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {

Value *PtrSrc = PtrToIntOp1->getOperand(0);

- if (PtrSrc->getType()->getPointerAddressSpace() ==

- Op0Src->getType()->getPointerAddressSpace()) {

+ if (PtrSrc->getType() == Op0Src->getType())

NewOp1 = PtrToIntOp1->getOperand(0);

- // If the pointer types don't match, insert a bitcast.

- if (Op0Src->getType() != NewOp1->getType())

- NewOp1 = Builder.CreateBitCast(NewOp1, Op0Src->getType());

- }

} else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {

NewOp1 = ConstantExpr::getIntToPtr(RHSC, SrcTy);

}

@@ -5641,22 +5810,20 @@ bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp,

/// \returns Instruction which must replace the compare instruction, NULL if no

/// replacement required.

static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,

- Value *OtherVal,

+ const APInt *OtherVal,

InstCombinerImpl &IC) {

// Don't bother doing this transformation for pointers, don't do it for

// vectors.

if (!isa<IntegerType>(MulVal->getType()))

return nullptr;

- assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);

- assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);

auto *MulInstr = dyn_cast<Instruction>(MulVal);

if (!MulInstr)

return nullptr;

assert(MulInstr->getOpcode() == Instruction::Mul);

- auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),

- *RHS = cast<ZExtOperator>(MulInstr->getOperand(1));

+ auto *LHS = cast<ZExtInst>(MulInstr->getOperand(0)),

+ *RHS = cast<ZExtInst>(MulInstr->getOperand(1));

assert(LHS->getOpcode() == Instruction::ZExt);

assert(RHS->getOpcode() == Instruction::ZExt);

Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);

@@ -5709,70 +5876,26 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,

// Recognize patterns

switch (I.getPredicate()) {

- case ICmpInst::ICMP_EQ:

- case ICmpInst::ICMP_NE:

- // Recognize pattern:

- // mulval = mul(zext A, zext B)

- // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.

- ConstantInt *CI;

- Value *ValToMask;

- if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {

- if (ValToMask != MulVal)

- return nullptr;

- const APInt &CVal = CI->getValue() + 1;

- if (CVal.isPowerOf2()) {

- unsigned MaskWidth = CVal.logBase2();

- if (MaskWidth == MulWidth)

- break; // Recognized

- }

- return nullptr;

- case ICmpInst::ICMP_UGT:

+ case ICmpInst::ICMP_UGT: {

// Recognize pattern:

// mulval = mul(zext A, zext B)

// cmp ugt mulval, max

- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {

- APInt MaxVal = APInt::getMaxValue(MulWidth);

- MaxVal = MaxVal.zext(CI->getBitWidth());

- if (MaxVal.eq(CI->getValue()))

- break; // Recognized

- }

- return nullptr;

- case ICmpInst::ICMP_UGE:

- // Recognize pattern:

- // mulval = mul(zext A, zext B)

- // cmp uge mulval, max+1

- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {

- APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);

- if (MaxVal.eq(CI->getValue()))

- break; // Recognized

- }

- return nullptr;

- case ICmpInst::ICMP_ULE:

- // Recognize pattern:

- // mulval = mul(zext A, zext B)

- // cmp ule mulval, max

- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {

- APInt MaxVal = APInt::getMaxValue(MulWidth);

- MaxVal = MaxVal.zext(CI->getBitWidth());

- if (MaxVal.eq(CI->getValue()))

- break; // Recognized

- }

+ APInt MaxVal = APInt::getMaxValue(MulWidth);

+ MaxVal = MaxVal.zext(OtherVal->getBitWidth());

+ if (MaxVal.eq(*OtherVal))

+ break; // Recognized

return nullptr;

+ }

- case ICmpInst::ICMP_ULT:

+ case ICmpInst::ICMP_ULT: {

// Recognize pattern:

// mulval = mul(zext A, zext B)

// cmp ule mulval, max + 1

- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {

- APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);

- if (MaxVal.eq(CI->getValue()))

- break; // Recognized

- }

+ APInt MaxVal = APInt::getOneBitSet(OtherVal->getBitWidth(), MulWidth);

+ if (MaxVal.eq(*OtherVal))

+ break; // Recognized

return nullptr;

+ }

default:

return nullptr;

@@ -5798,7 +5921,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,

if (MulVal->hasNUsesOrMore(2)) {

Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");

for (User *U : make_early_inc_range(MulVal->users())) {

- if (U == &I || U == OtherVal)

+ if (U == &I)

continue;

if (TruncInst *TI = dyn_cast<TruncInst>(U)) {

if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)

@@ -5819,34 +5942,10 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,

IC.addToWorklist(cast<Instruction>(U));

}

- if (isa<Instruction>(OtherVal))

- IC.addToWorklist(cast<Instruction>(OtherVal));

// The original icmp gets replaced with the overflow value, maybe inverted

// depending on predicate.

- bool Inverse = false;

- switch (I.getPredicate()) {

- case ICmpInst::ICMP_NE:

- break;

- case ICmpInst::ICMP_EQ:

- Inverse = true;

- break;

- case ICmpInst::ICMP_UGT:

- case ICmpInst::ICMP_UGE:

- if (I.getOperand(0) == MulVal)

- break;

- Inverse = true;

- break;

- case ICmpInst::ICMP_ULT:

- case ICmpInst::ICMP_ULE:

- if (I.getOperand(1) == MulVal)

- break;

- Inverse = true;

- break;

- default:

- llvm_unreachable("Unexpected predicate");

- }

- if (Inverse) {

+ if (I.getPredicate() == ICmpInst::ICMP_ULT) {

Value *Res = Builder.CreateExtractValue(Call, 1);

return BinaryOperator::CreateNot(Res);

}

@@ -6015,13 +6114,19 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {

KnownBits Op0Known(BitWidth);

KnownBits Op1Known(BitWidth);

- if (SimplifyDemandedBits(&I, 0,

- getDemandedBitsLHSMask(I, BitWidth),

- Op0Known, 0))

- return &I;

+ {

+ // Don't use dominating conditions when folding icmp using known bits. This

+ // may convert signed into unsigned predicates in ways that other passes

+ // (especially IndVarSimplify) may not be able to reliably undo.

+ SQ.DC = nullptr;

+ auto _ = make_scope_exit([&]() { SQ.DC = &DC; });

+ if (SimplifyDemandedBits(&I, 0, getDemandedBitsLHSMask(I, BitWidth),

+ Op0Known, 0))

+ return &I;

- if (SimplifyDemandedBits(&I, 1, APInt::getAllOnes(BitWidth), Op1Known, 0))

- return &I;

+ if (SimplifyDemandedBits(&I, 1, APInt::getAllOnes(BitWidth), Op1Known, 0))

+ return &I;

+ }

// Given the known and unknown bits, compute a range that the LHS could be

// in. Compute the Min, Max and RHS values based on the known bits. For the

@@ -6269,57 +6374,70 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {

Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE)

return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y);

+ // icmp eq/ne X, (zext/sext (icmp eq/ne X, C))

+ ICmpInst::Predicate Pred1, Pred2;

const APInt *C;

- if (match(I.getOperand(0), m_c_Add(m_ZExt(m_Value(X)), m_SExt(m_Value(Y)))) &&

- match(I.getOperand(1), m_APInt(C)) &&

- X->getType()->isIntOrIntVectorTy(1) &&

- Y->getType()->isIntOrIntVectorTy(1)) {

- unsigned BitWidth = C->getBitWidth();

- Pred = I.getPredicate();

- APInt Zero = APInt::getZero(BitWidth);

- APInt MinusOne = APInt::getAllOnes(BitWidth);

- APInt One(BitWidth, 1);

- if ((C->sgt(Zero) && Pred == ICmpInst::ICMP_SGT) ||

- (C->slt(Zero) && Pred == ICmpInst::ICMP_SLT))

- return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));

- if ((C->sgt(One) && Pred == ICmpInst::ICMP_SLT) ||

- (C->slt(MinusOne) && Pred == ICmpInst::ICMP_SGT))

- return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));

- if (I.getOperand(0)->hasOneUse()) {

- APInt NewC = *C;

- // canonicalize predicate to eq/ne

- if ((*C == Zero && Pred == ICmpInst::ICMP_SLT) ||

- (*C != Zero && *C != MinusOne && Pred == ICmpInst::ICMP_UGT)) {

- // x s< 0 in [-1, 1] --> x == -1

- // x u> 1(or any const !=0 !=-1) in [-1, 1] --> x == -1

- NewC = MinusOne;

- Pred = ICmpInst::ICMP_EQ;

- } else if ((*C == MinusOne && Pred == ICmpInst::ICMP_SGT) ||

- (*C != Zero && *C != One && Pred == ICmpInst::ICMP_ULT)) {

- // x s> -1 in [-1, 1] --> x != -1

- // x u< -1 in [-1, 1] --> x != -1

- Pred = ICmpInst::ICMP_NE;

- } else if (*C == Zero && Pred == ICmpInst::ICMP_SGT) {

- // x s> 0 in [-1, 1] --> x == 1

- NewC = One;

- Pred = ICmpInst::ICMP_EQ;

- } else if (*C == One && Pred == ICmpInst::ICMP_SLT) {

- // x s< 1 in [-1, 1] --> x != 1

- Pred = ICmpInst::ICMP_NE;

+ Instruction *ExtI;

+ if (match(&I, m_c_ICmp(Pred1, m_Value(X),

+ m_CombineAnd(m_Instruction(ExtI),

+ m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X),

+ m_APInt(C)))))) &&

+ ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) {

+ bool IsSExt = ExtI->getOpcode() == Instruction::SExt;

+ bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse();

+ auto CreateRangeCheck = [&] {

+ Value *CmpV1 =

+ Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType()));

+ Value *CmpV2 = Builder.CreateICmp(

+ Pred1, X, ConstantInt::getSigned(X->getType(), IsSExt ? -1 : 1));

+ return BinaryOperator::Create(

+ Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And,

+ CmpV1, CmpV2);

+ };

+ if (C->isZero()) {

+ if (Pred2 == ICmpInst::ICMP_EQ) {

+ // icmp eq X, (zext/sext (icmp eq X, 0)) --> false

+ // icmp ne X, (zext/sext (icmp eq X, 0)) --> true

+ return replaceInstUsesWith(

+ I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE));

+ } else if (!IsSExt || HasOneUse) {

+ // icmp eq X, (zext (icmp ne X, 0)) --> X == 0 || X == 1

+ // icmp ne X, (zext (icmp ne X, 0)) --> X != 0 && X != 1

+ // icmp eq X, (sext (icmp ne X, 0)) --> X == 0 || X == -1

+ // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X == -1

+ return CreateRangeCheck();

}

- if (NewC == MinusOne) {

- if (Pred == ICmpInst::ICMP_EQ)

- return BinaryOperator::CreateAnd(Builder.CreateNot(X), Y);

- if (Pred == ICmpInst::ICMP_NE)

- return BinaryOperator::CreateOr(X, Builder.CreateNot(Y));

- } else if (NewC == One) {

- if (Pred == ICmpInst::ICMP_EQ)

- return BinaryOperator::CreateAnd(X, Builder.CreateNot(Y));

- if (Pred == ICmpInst::ICMP_NE)

- return BinaryOperator::CreateOr(Builder.CreateNot(X), Y);

+ } else if (IsSExt ? C->isAllOnes() : C->isOne()) {

+ if (Pred2 == ICmpInst::ICMP_NE) {

+ // icmp eq X, (zext (icmp ne X, 1)) --> false

+ // icmp ne X, (zext (icmp ne X, 1)) --> true

+ // icmp eq X, (sext (icmp ne X, -1)) --> false

+ // icmp ne X, (sext (icmp ne X, -1)) --> true

+ return replaceInstUsesWith(

+ I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE));

+ } else if (!IsSExt || HasOneUse) {

+ // icmp eq X, (zext (icmp eq X, 1)) --> X == 0 || X == 1

+ // icmp ne X, (zext (icmp eq X, 1)) --> X != 0 && X != 1

+ // icmp eq X, (sext (icmp eq X, -1)) --> X == 0 || X == -1

+ // icmp ne X, (sext (icmp eq X, -1)) --> X != 0 && X == -1

+ return CreateRangeCheck();

}

+ } else {

+ // when C != 0 && C != 1:

+ // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0

+ // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1

+ // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0

+ // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1

+ // when C != 0 && C != -1:

+ // icmp eq X, (sext (icmp eq X, C)) --> icmp eq X, 0

+ // icmp eq X, (sext (icmp ne X, C)) --> icmp eq X, -1

+ // icmp ne X, (sext (icmp eq X, C)) --> icmp ne X, 0

+ // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1

+ return ICmpInst::Create(

+ Instruction::ICmp, Pred1, X,

+ ConstantInt::getSigned(X->getType(), Pred2 == ICmpInst::ICMP_NE

+ ? (IsSExt ? -1 : 1)

+ : 0));

}

@@ -6783,6 +6901,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {

if (Instruction *Res = foldICmpUsingKnownBits(I))

return Res;

+ if (Instruction *Res = foldICmpTruncWithTruncOrExt(I, Q))

+ return Res;

// Test if the ICmpInst instruction is used exclusively by a select as

// part of a minimum or maximum operation. If so, refrain from doing

// any other folding. This helps out other analyses which understand

@@ -6913,38 +7034,40 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {

return Res;

{

- Value *A, *B;

- // Transform (A & ~B) == 0 --> (A & B) != 0

- // and (A & ~B) != 0 --> (A & B) == 0

+ Value *X, *Y;

+ // Transform (X & ~Y) == 0 --> (X & Y) != 0

+ // and (X & ~Y) != 0 --> (X & Y) == 0

// if A is a power of 2.

- if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&

- match(Op1, m_Zero()) &&

- isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality())

- return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B),

+ if (match(Op0, m_And(m_Value(X), m_Not(m_Value(Y)))) &&

+ match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(X, false, 0, &I) &&

+ I.isEquality())

+ return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(X, Y),

Op1);

- // ~X < ~Y --> Y < X

- // ~X < C --> X > ~C

- if (match(Op0, m_Not(m_Value(A)))) {

- if (match(Op1, m_Not(m_Value(B))))

- return new ICmpInst(I.getPredicate(), B, A);

- const APInt *C;

- if (match(Op1, m_APInt(C)))

- return new ICmpInst(I.getSwappedPredicate(), A,

- ConstantInt::get(Op1->getType(), ~(*C)));

+ // Op0 pred Op1 -> ~Op1 pred ~Op0, if this allows us to drop an instruction.

+ if (Op0->getType()->isIntOrIntVectorTy()) {

+ bool ConsumesOp0, ConsumesOp1;

+ if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) &&

+ isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) &&

+ (ConsumesOp0 || ConsumesOp1)) {

+ Value *InvOp0 = getFreelyInverted(Op0, Op0->hasOneUse(), &Builder);

+ Value *InvOp1 = getFreelyInverted(Op1, Op1->hasOneUse(), &Builder);

+ assert(InvOp0 && InvOp1 &&

+ "Mismatch between isFreeToInvert and getFreelyInverted");

+ return new ICmpInst(I.getSwappedPredicate(), InvOp0, InvOp1);

+ }

}

Instruction *AddI = nullptr;

- if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),

+ if (match(&I, m_UAddWithOverflow(m_Value(X), m_Value(Y),

m_Instruction(AddI))) &&

- isa<IntegerType>(A->getType())) {

+ isa<IntegerType>(X->getType())) {

Value *Result;

Constant *Overflow;

// m_UAddWithOverflow can match patterns that do not include an explicit

// "add" instruction, so check the opcode of the matched op.

if (AddI->getOpcode() == Instruction::Add &&

- OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, A, B, *AddI,

+ OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, X, Y, *AddI,

Result, Overflow)) {

replaceInstUsesWith(*AddI, Result);

eraseInstFromFunction(*AddI);

@@ -6952,14 +7075,37 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {

}

- // (zext a) * (zext b) --> llvm.umul.with.overflow.

- if (match(Op0, m_NUWMul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {

- if (Instruction *R = processUMulZExtIdiom(I, Op0, Op1, *this))

+ // (zext X) * (zext Y) --> llvm.umul.with.overflow.

+ if (match(Op0, m_NUWMul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&

+ match(Op1, m_APInt(C))) {

+ if (Instruction *R = processUMulZExtIdiom(I, Op0, C, *this))

return R;

}

- if (match(Op1, m_NUWMul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {

- if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this))

- return R;

+ // Signbit test folds

+ // Fold (X u>> BitWidth - 1 Pred ZExt(i1)) --> X s< 0 Pred i1

+ // Fold (X s>> BitWidth - 1 Pred SExt(i1)) --> X s< 0 Pred i1

+ Instruction *ExtI;

+ if ((I.isUnsigned() || I.isEquality()) &&

+ match(Op1,

+ m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(Y)))) &&

+ Y->getType()->getScalarSizeInBits() == 1 &&

+ (Op0->hasOneUse() || Op1->hasOneUse())) {

+ unsigned OpWidth = Op0->getType()->getScalarSizeInBits();

+ Instruction *ShiftI;

+ if (match(Op0, m_CombineAnd(m_Instruction(ShiftI),

+ m_Shr(m_Value(X), m_SpecificIntAllowUndef(

+ OpWidth - 1))))) {

+ unsigned ExtOpc = ExtI->getOpcode();

+ unsigned ShiftOpc = ShiftI->getOpcode();

+ if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) ||

+ (ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) {

+ Value *SLTZero =

+ Builder.CreateICmpSLT(X, Constant::getNullValue(X->getType()));

+ Value *Cmp = Builder.CreateICmp(Pred, SLTZero, Y, I.getName());

+ return replaceInstUsesWith(I, Cmp);

+ }

}

@@ -7177,17 +7323,14 @@ Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I,

}

// Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or

- // [0, UMAX], but it may still be fractional. See if it is fractional by

- // casting the FP value to the integer value and back, checking for equality.

+ // [0, UMAX], but it may still be fractional. Check whether this is the case

+ // using the IsExact flag.

// Don't do this for zero, because -0.0 is not fractional.

- Constant *RHSInt = LHSUnsigned

- ? ConstantExpr::getFPToUI(RHSC, IntTy)

- : ConstantExpr::getFPToSI(RHSC, IntTy);

+ APSInt RHSInt(IntWidth, LHSUnsigned);

+ bool IsExact;

+ RHS.convertToInteger(RHSInt, APFloat::rmTowardZero, &IsExact);

if (!RHS.isZero()) {

- bool Equal = LHSUnsigned

- ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC

- : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;

- if (!Equal) {

+ if (!IsExact) {

// If we had a comparison against a fractional value, we have to adjust

// the compare predicate and sometimes the value. RHSC is rounded towards

// zero at this point.

@@ -7253,7 +7396,7 @@ Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I,

// Lower this FP comparison into an appropriate integer version of the

// comparison.

- return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);

+ return new ICmpInst(Pred, LHSI->getOperand(0), Builder.getInt(RHSInt));

}

/// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary.

@@ -7532,12 +7675,8 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {

if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) {

switch (LHSI->getOpcode()) {

case Instruction::PHI:

- // Only fold fcmp into the PHI if the phi and fcmp are in the same

- // block. If in the same block, we're encouraging jump threading. If

- // not, we are just pessimizing the code by making an i1 phi.

- if (LHSI->getParent() == I.getParent())

- if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))

- return NV;

+ if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))

+ return NV;

break;

case Instruction::SIToFP:

case Instruction::UIToFP:

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 701579e1de48..bb620ad8d41c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h

@@ -16,6 +16,7 @@

#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H

#include "llvm/ADT/Statistic.h"

+#include "llvm/ADT/PostOrderIterator.h"

#include "llvm/Analysis/InstructionSimplify.h"

#include "llvm/Analysis/TargetFolder.h"

#include "llvm/Analysis/ValueTracking.h"

@@ -73,6 +74,10 @@ public:

virtual ~InstCombinerImpl() = default;

+ /// Perform early cleanup and prepare the InstCombine worklist.

+ bool prepareWorklist(Function &F,

+ ReversePostOrderTraversal<BasicBlock *> &RPOT);

/// Run the combiner over the entire worklist until it is empty.

///

/// \returns true if the IR is changed.

@@ -93,6 +98,7 @@ public:

Instruction *visitSub(BinaryOperator &I);

Instruction *visitFSub(BinaryOperator &I);

Instruction *visitMul(BinaryOperator &I);

+ Instruction *foldFMulReassoc(BinaryOperator &I);

Instruction *visitFMul(BinaryOperator &I);

Instruction *visitURem(BinaryOperator &I);

Instruction *visitSRem(BinaryOperator &I);

@@ -126,7 +132,6 @@ public:

Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1,

BinaryOperator &I);

Instruction *commonCastTransforms(CastInst &CI);

- Instruction *commonPointerCastTransforms(CastInst &CI);

Instruction *visitTrunc(TruncInst &CI);

Instruction *visitZExt(ZExtInst &Zext);

Instruction *visitSExt(SExtInst &Sext);

@@ -193,6 +198,44 @@ public:

LoadInst *combineLoadToNewType(LoadInst &LI, Type *NewTy,

const Twine &Suffix = "");

+ KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF,

+ FPClassTest Interested = fcAllFlags,

+ const Instruction *CtxI = nullptr,

+ unsigned Depth = 0) const {

+ return llvm::computeKnownFPClass(Val, FMF, DL, Interested, Depth, &TLI, &AC,

+ CtxI, &DT);

+ }

+ KnownFPClass computeKnownFPClass(Value *Val,

+ FPClassTest Interested = fcAllFlags,

+ const Instruction *CtxI = nullptr,

+ unsigned Depth = 0) const {

+ return llvm::computeKnownFPClass(Val, DL, Interested, Depth, &TLI, &AC,

+ CtxI, &DT);

+ }

+ /// Check if fmul \p MulVal, +0.0 will yield +0.0 (or signed zero is

+ /// ignorable).

+ bool fmulByZeroIsZero(Value *MulVal, FastMathFlags FMF,

+ const Instruction *CtxI) const;

+ Constant *getLosslessTrunc(Constant *C, Type *TruncTy, unsigned ExtOp) {

+ Constant *TruncC = ConstantExpr::getTrunc(C, TruncTy);

+ Constant *ExtTruncC =

+ ConstantFoldCastOperand(ExtOp, TruncC, C->getType(), DL);

+ if (ExtTruncC && ExtTruncC == C)

+ return TruncC;

+ return nullptr;

+ }

+ Constant *getLosslessUnsignedTrunc(Constant *C, Type *TruncTy) {

+ return getLosslessTrunc(C, TruncTy, Instruction::ZExt);

+ }

+ Constant *getLosslessSignedTrunc(Constant *C, Type *TruncTy) {

+ return getLosslessTrunc(C, TruncTy, Instruction::SExt);

+ }

private:

bool annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI);

bool isDesirableIntType(unsigned BitWidth) const;

@@ -252,13 +295,15 @@ private:

Instruction *transformSExtICmp(ICmpInst *Cmp, SExtInst &Sext);

- bool willNotOverflowSignedAdd(const Value *LHS, const Value *RHS,

+ bool willNotOverflowSignedAdd(const WithCache<const Value *> &LHS,

+ const WithCache<const Value *> &RHS,

const Instruction &CxtI) const {

return computeOverflowForSignedAdd(LHS, RHS, &CxtI) ==

OverflowResult::NeverOverflows;

}

- bool willNotOverflowUnsignedAdd(const Value *LHS, const Value *RHS,

+ bool willNotOverflowUnsignedAdd(const WithCache<const Value *> &LHS,

+ const WithCache<const Value *> &RHS,

const Instruction &CxtI) const {

return computeOverflowForUnsignedAdd(LHS, RHS, &CxtI) ==

OverflowResult::NeverOverflows;

@@ -387,15 +432,17 @@ private:

Instruction *foldAndOrOfSelectUsingImpliedCond(Value *Op, SelectInst &SI,

bool IsAnd);

+ Instruction *hoistFNegAboveFMulFDiv(Value *FNegOp, Instruction &FMFSource);

public:

/// Create and insert the idiom we use to indicate a block is unreachable

/// without having to rewrite the CFG from within InstCombine.

void CreateNonTerminatorUnreachable(Instruction *InsertAt) {

auto &Ctx = InsertAt->getContext();

auto *SI = new StoreInst(ConstantInt::getTrue(Ctx),

- PoisonValue::get(Type::getInt1PtrTy(Ctx)),

+ PoisonValue::get(PointerType::getUnqual(Ctx)),

/*isVolatile*/ false, Align(1));

- InsertNewInstBefore(SI, *InsertAt);

+ InsertNewInstBefore(SI, InsertAt->getIterator());

}

/// Combiner aware instruction erasure.

@@ -412,6 +459,7 @@ public:

// use counts.

SmallVector<Value *> Ops(I.operands());

Worklist.remove(&I);

+ DC.removeValue(&I);

I.eraseFromParent();

for (Value *Op : Ops)

Worklist.handleUseCountDecrement(Op);

@@ -498,6 +546,7 @@ public:

/// Tries to simplify operands to an integer instruction based on its

/// demanded bits.

bool SimplifyDemandedInstructionBits(Instruction &Inst);

+ bool SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known);

Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,

APInt &UndefElts, unsigned Depth = 0,

@@ -535,6 +584,9 @@ public:

Instruction *foldAddWithConstant(BinaryOperator &Add);

+ Instruction *foldSquareSumInt(BinaryOperator &I);

+ Instruction *foldSquareSumFP(BinaryOperator &I);

/// Try to rotate an operation below a PHI node, using PHI nodes for

/// its operands.

Instruction *foldPHIArgOpIntoPHI(PHINode &PN);

@@ -580,6 +632,9 @@ public:

Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp,

const APInt &C);

Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ);

+ Instruction *foldICmpWithMinMaxImpl(Instruction &I, MinMaxIntrinsic *MinMax,

+ Value *Z, ICmpInst::Predicate Pred);

+ Instruction *foldICmpWithMinMax(ICmpInst &Cmp);

Instruction *foldICmpEquality(ICmpInst &Cmp);

Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I);

Instruction *foldSignBitTest(ICmpInst &I);

@@ -593,6 +648,8 @@ public:

ConstantInt *C);

Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc,

const APInt &C);

+ Instruction *foldICmpTruncWithTruncOrExt(ICmpInst &Cmp,

+ const SimplifyQuery &Q);

Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And,

const APInt &C);

Instruction *foldICmpXorConstant(ICmpInst &Cmp, BinaryOperator *Xor,

@@ -667,8 +724,12 @@ public:

bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock);

bool removeInstructionsBeforeUnreachable(Instruction &I);

- bool handleUnreachableFrom(Instruction *I);

- bool handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc);

+ void addDeadEdge(BasicBlock *From, BasicBlock *To,

+ SmallVectorImpl<BasicBlock *> &Worklist);

+ void handleUnreachableFrom(Instruction *I,

+ SmallVectorImpl<BasicBlock *> &Worklist);

+ void handlePotentiallyDeadBlocks(SmallVectorImpl<BasicBlock *> &Worklist);

+ void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc);

void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser = nullptr);

};

@@ -679,16 +740,11 @@ class Negator final {

using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>;

BuilderTy Builder;

- const DataLayout &DL;

- AssumptionCache &AC;

- const DominatorTree &DT;

const bool IsTrulyNegation;

SmallDenseMap<Value *, Value *> NegationsCache;

- Negator(LLVMContext &C, const DataLayout &DL, AssumptionCache &AC,

- const DominatorTree &DT, bool IsTrulyNegation);

+ Negator(LLVMContext &C, const DataLayout &DL, bool IsTrulyNegation);

#if LLVM_ENABLE_STATS

unsigned NumValuesVisitedInThisNegator = 0;

@@ -700,13 +756,13 @@ class Negator final {

std::array<Value *, 2> getSortedOperandsOfBinOp(Instruction *I);

- [[nodiscard]] Value *visitImpl(Value *V, unsigned Depth);

+ [[nodiscard]] Value *visitImpl(Value *V, bool IsNSW, unsigned Depth);

- [[nodiscard]] Value *negate(Value *V, unsigned Depth);

+ [[nodiscard]] Value *negate(Value *V, bool IsNSW, unsigned Depth);

/// Recurse depth-first and attempt to sink the negation.

/// FIXME: use worklist?

- [[nodiscard]] std::optional<Result> run(Value *Root);

+ [[nodiscard]] std::optional<Result> run(Value *Root, bool IsNSW);

Negator(const Negator &) = delete;

Negator(Negator &&) = delete;

@@ -716,7 +772,7 @@ class Negator final {

public:

/// Attempt to negate \p Root. Retuns nullptr if negation can't be performed,

/// otherwise returns negated value.

- [[nodiscard]] static Value *Negate(bool LHSIsZero, Value *Root,

+ [[nodiscard]] static Value *Negate(bool LHSIsZero, bool IsNSW, Value *Root,

InstCombinerImpl &IC);

};

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6aa20ee26b9a..b72b68c68d98 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

@@ -36,6 +36,13 @@ static cl::opt<unsigned> MaxCopiedFromConstantUsers(

cl::desc("Maximum users to visit in copy from constant transform"),

cl::Hidden);

+namespace llvm {

+cl::opt<bool> EnableInferAlignmentPass(

+ "enable-infer-alignment-pass", cl::init(true), cl::Hidden, cl::ZeroOrMore,

+ cl::desc("Enable the InferAlignment pass, disabling alignment inference in "

+ "InstCombine"));

/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)

/// pointer to an alloca. Ignore any reads of the pointer, return false if we

/// see any stores or other unknown uses. If we see pointer arithmetic, keep

@@ -224,7 +231,7 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,

Value *Idx[2] = {NullIdx, NullIdx};

Instruction *GEP = GetElementPtrInst::CreateInBounds(

NewTy, New, Idx, New->getName() + ".sub");

- IC.InsertNewInstBefore(GEP, *It);

+ IC.InsertNewInstBefore(GEP, It);

// Now make everything use the getelementptr instead of the original

// allocation.

@@ -380,7 +387,7 @@ void PointerReplacer::replace(Instruction *I) {

NewI->takeName(LT);

copyMetadataForLoad(*NewI, *LT);

- IC.InsertNewInstWith(NewI, *LT);

+ IC.InsertNewInstWith(NewI, LT->getIterator());

IC.replaceInstUsesWith(*LT, NewI);

WorkMap[LT] = NewI;

} else if (auto *PHI = dyn_cast<PHINode>(I)) {

@@ -398,7 +405,7 @@ void PointerReplacer::replace(Instruction *I) {

Indices.append(GEP->idx_begin(), GEP->idx_end());

auto *NewI =

GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);

- IC.InsertNewInstWith(NewI, *GEP);

+ IC.InsertNewInstWith(NewI, GEP->getIterator());

NewI->takeName(GEP);

WorkMap[GEP] = NewI;

} else if (auto *BC = dyn_cast<BitCastInst>(I)) {

@@ -407,14 +414,14 @@ void PointerReplacer::replace(Instruction *I) {

auto *NewT = PointerType::get(BC->getType()->getContext(),

V->getType()->getPointerAddressSpace());

auto *NewI = new BitCastInst(V, NewT);

- IC.InsertNewInstWith(NewI, *BC);

+ IC.InsertNewInstWith(NewI, BC->getIterator());

NewI->takeName(BC);

WorkMap[BC] = NewI;

} else if (auto *SI = dyn_cast<SelectInst>(I)) {

auto *NewSI = SelectInst::Create(

SI->getCondition(), getReplacement(SI->getTrueValue()),

getReplacement(SI->getFalseValue()), SI->getName(), nullptr, SI);

- IC.InsertNewInstWith(NewSI, *SI);

+ IC.InsertNewInstWith(NewSI, SI->getIterator());

NewSI->takeName(SI);

WorkMap[SI] = NewSI;

} else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {

@@ -449,7 +456,7 @@ void PointerReplacer::replace(Instruction *I) {

ASC->getType()->getPointerAddressSpace()) {

auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");

NewI->takeName(ASC);

- IC.InsertNewInstWith(NewI, *ASC);

+ IC.InsertNewInstWith(NewI, ASC->getIterator());

NewV = NewI;

}

IC.replaceInstUsesWith(*ASC, NewV);

@@ -507,8 +514,6 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {

// types.

const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());

EntryAI->setAlignment(MaxAlign);

- if (AI.getType() != EntryAI->getType())

- return new BitCastInst(EntryAI, AI.getType());

return replaceInstUsesWith(AI, EntryAI);

}

@@ -534,13 +539,11 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {

LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');

LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');

unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();

- auto *DestTy = PointerType::get(AI.getAllocatedType(), SrcAddrSpace);

if (AI.getAddressSpace() == SrcAddrSpace) {

for (Instruction *Delete : ToDelete)

eraseInstFromFunction(*Delete);

- Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);

- Instruction *NewI = replaceInstUsesWith(AI, Cast);

+ Instruction *NewI = replaceInstUsesWith(AI, TheSrc);

eraseInstFromFunction(*Copy);

++NumGlobalCopies;

return NewI;

@@ -551,8 +554,7 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {

for (Instruction *Delete : ToDelete)

eraseInstFromFunction(*Delete);

- Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);

- PtrReplacer.replacePointer(Cast);

+ PtrReplacer.replacePointer(TheSrc);

++NumGlobalCopies;

}

@@ -582,16 +584,9 @@ LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy,

assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&

"can't fold an atomic load to requested type");

- Value *Ptr = LI.getPointerOperand();

- unsigned AS = LI.getPointerAddressSpace();

- Type *NewPtrTy = NewTy->getPointerTo(AS);

- Value *NewPtr = nullptr;

- if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&

- NewPtr->getType() == NewPtrTy))

- NewPtr = Builder.CreateBitCast(Ptr, NewPtrTy);

- LoadInst *NewLoad = Builder.CreateAlignedLoad(

- NewTy, NewPtr, LI.getAlign(), LI.isVolatile(), LI.getName() + Suffix);

+ LoadInst *NewLoad =

+ Builder.CreateAlignedLoad(NewTy, LI.getPointerOperand(), LI.getAlign(),

+ LI.isVolatile(), LI.getName() + Suffix);

NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());

copyMetadataForLoad(*NewLoad, LI);

return NewLoad;

@@ -606,13 +601,11 @@ static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,

"can't fold an atomic store of requested type");

Value *Ptr = SI.getPointerOperand();

- unsigned AS = SI.getPointerAddressSpace();

SmallVector<std::pair<unsigned, MDNode *>, 8> MD;

SI.getAllMetadata(MD);

- StoreInst *NewStore = IC.Builder.CreateAlignedStore(

- V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),

- SI.getAlign(), SI.isVolatile());

+ StoreInst *NewStore =

+ IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());

NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());

for (const auto &MDPair : MD) {

unsigned ID = MDPair.first;

@@ -655,29 +648,6 @@ static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,

return NewStore;

}

-/// Returns true if instruction represent minmax pattern like:

-/// select ((cmp load V1, load V2), V1, V2).

-static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) {

- assert(V->getType()->isPointerTy() && "Expected pointer type.");

- // Ignore possible ty* to ixx* bitcast.

- V = InstCombiner::peekThroughBitcast(V);

- // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax

- // pattern.

- CmpInst::Predicate Pred;

- Instruction *L1;

- Instruction *L2;

- Value *LHS;

- Value *RHS;

- if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),

- m_Value(LHS), m_Value(RHS))))

- return false;

- LoadTy = L1->getType();

- return (match(L1, m_Load(m_Specific(LHS))) &&

- match(L2, m_Load(m_Specific(RHS)))) ||

- (match(L1, m_Load(m_Specific(RHS))) &&

- match(L2, m_Load(m_Specific(LHS))));

/// Combine loads to match the type of their uses' value after looking

/// through intervening bitcasts.

///

@@ -818,7 +788,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {

return nullptr;

const DataLayout &DL = IC.getDataLayout();

- auto EltSize = DL.getTypeAllocSize(ET);

+ TypeSize EltSize = DL.getTypeAllocSize(ET);

const auto Align = LI.getAlign();

auto *Addr = LI.getPointerOperand();

@@ -826,7 +796,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {

auto *Zero = ConstantInt::get(IdxType, 0);

Value *V = PoisonValue::get(T);

- uint64_t Offset = 0;

+ TypeSize Offset = TypeSize::get(0, ET->isScalableTy());

for (uint64_t i = 0; i < NumElements; i++) {

Value *Indices[2] = {

Zero,

@@ -834,9 +804,9 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {

};

auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),

Name + ".elt");

+ auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());

auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,

- commonAlignment(Align, Offset),

- Name + ".unpack");

+ EltAlign, Name + ".unpack");

L->setAAMetadata(LI.getAAMetadata());

V = IC.Builder.CreateInsertValue(V, L, i);

Offset += EltSize;

@@ -971,7 +941,7 @@ static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,

Type *SourceElementType = GEPI->getSourceElementType();

// Size information about scalable vectors is not available, so we cannot

// deduce whether indexing at n is undefined behaviour or not. Bail out.

- if (isa<ScalableVectorType>(SourceElementType))

+ if (SourceElementType->isScalableTy())

return false;

Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);

@@ -1020,7 +990,7 @@ static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,

Instruction *NewGEPI = GEPI->clone();

NewGEPI->setOperand(Idx,

ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));

- IC.InsertNewInstBefore(NewGEPI, *GEPI);

+ IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());

return NewGEPI;

}

@@ -1062,11 +1032,13 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {

if (Instruction *Res = combineLoadToOperationType(*this, LI))

return Res;

- // Attempt to improve the alignment.

- Align KnownAlign = getOrEnforceKnownAlignment(

- Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);

- if (KnownAlign > LI.getAlign())

- LI.setAlignment(KnownAlign);

+ if (!EnableInferAlignmentPass) {

+ // Attempt to improve the alignment.

+ Align KnownAlign = getOrEnforceKnownAlignment(

+ Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);

+ if (KnownAlign > LI.getAlign())

+ LI.setAlignment(KnownAlign);

+ }

// Replace GEP indices if possible.

if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))

@@ -1337,7 +1309,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {

return false;

const DataLayout &DL = IC.getDataLayout();

- auto EltSize = DL.getTypeAllocSize(AT->getElementType());

+ TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());

const auto Align = SI.getAlign();

SmallString<16> EltName = V->getName();

@@ -1349,7 +1321,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {

auto *IdxType = Type::getInt64Ty(T->getContext());

auto *Zero = ConstantInt::get(IdxType, 0);

- uint64_t Offset = 0;

+ TypeSize Offset = TypeSize::get(0, AT->getElementType()->isScalableTy());

for (uint64_t i = 0; i < NumElements; i++) {

Value *Indices[2] = {

Zero,

@@ -1358,7 +1330,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {

auto *Ptr =

IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);

auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);

- auto EltAlign = commonAlignment(Align, Offset);

+ auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());

Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);

NS->setAAMetadata(SI.getAAMetadata());

Offset += EltSize;

@@ -1399,58 +1371,6 @@ static bool equivalentAddressValues(Value *A, Value *B) {

return false;

}

-/// Converts store (bitcast (load (bitcast (select ...)))) to

-/// store (load (select ...)), where select is minmax:

-/// select ((cmp load V1, load V2), V1, V2).

-static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC,

- StoreInst &SI) {

- // bitcast?

- if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))

- return false;

- // load? integer?

- Value *LoadAddr;

- if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))

- return false;

- auto *LI = cast<LoadInst>(SI.getValueOperand());

- if (!LI->getType()->isIntegerTy())

- return false;

- Type *CmpLoadTy;

- if (!isMinMaxWithLoads(LoadAddr, CmpLoadTy))

- return false;

- // Make sure the type would actually change.

- // This condition can be hit with chains of bitcasts.

- if (LI->getType() == CmpLoadTy)

- return false;

- // Make sure we're not changing the size of the load/store.

- const auto &DL = IC.getDataLayout();

- if (DL.getTypeStoreSizeInBits(LI->getType()) !=

- DL.getTypeStoreSizeInBits(CmpLoadTy))

- return false;

- if (!all_of(LI->users(), [LI, LoadAddr](User *U) {

- auto *SI = dyn_cast<StoreInst>(U);

- return SI && SI->getPointerOperand() != LI &&

- InstCombiner::peekThroughBitcast(SI->getPointerOperand()) !=

- LoadAddr &&

- !SI->getPointerOperand()->isSwiftError();

- }))

- return false;

- IC.Builder.SetInsertPoint(LI);

- LoadInst *NewLI = IC.combineLoadToNewType(*LI, CmpLoadTy);

- // Replace all the stores with stores of the newly loaded value.

- for (auto *UI : LI->users()) {

- auto *USI = cast<StoreInst>(UI);

- IC.Builder.SetInsertPoint(USI);

- combineStoreToNewValue(IC, *USI, NewLI);

- }

- IC.replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));

- IC.eraseInstFromFunction(*LI);

- return true;

Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {

Value *Val = SI.getOperand(0);

Value *Ptr = SI.getOperand(1);

@@ -1459,19 +1379,18 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {

if (combineStoreToValueType(*this, SI))

return eraseInstFromFunction(SI);

- // Attempt to improve the alignment.

- const Align KnownAlign = getOrEnforceKnownAlignment(

- Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);

- if (KnownAlign > SI.getAlign())

- SI.setAlignment(KnownAlign);

+ if (!EnableInferAlignmentPass) {

+ // Attempt to improve the alignment.

+ const Align KnownAlign = getOrEnforceKnownAlignment(

+ Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);

+ if (KnownAlign > SI.getAlign())

+ SI.setAlignment(KnownAlign);

+ }

// Try to canonicalize the stored type.

if (unpackStoreToAggregate(*this, SI))

return eraseInstFromFunction(SI);

- if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))

- return eraseInstFromFunction(SI);

// Replace GEP indices if possible.

if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))

return replaceOperand(SI, 1, NewGEPI);

@@ -1508,8 +1427,7 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {

--BBI;

// Don't count debug info directives, lest they affect codegen,

// and we skip pointer-to-pointer bitcasts, which are NOPs.

- if (BBI->isDebugOrPseudoInst() ||

- (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {

+ if (BBI->isDebugOrPseudoInst()) {

ScanInsts++;

continue;

}

@@ -1560,11 +1478,15 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {

// This is a non-terminator unreachable marker. Don't remove it.

if (isa<UndefValue>(Ptr)) {

- // Remove all instructions after the marker and guaranteed-to-transfer

- // instructions before the marker.

- if (handleUnreachableFrom(SI.getNextNode()) ||

- removeInstructionsBeforeUnreachable(SI))

+ // Remove guaranteed-to-transfer instructions before the marker.

+ if (removeInstructionsBeforeUnreachable(SI))

return &SI;

+ // Remove all instructions after the marker and handle dead blocks this

+ // implies.

+ SmallVector<BasicBlock *> Worklist;

+ handleUnreachableFrom(SI.getNextNode(), Worklist);

+ handlePotentiallyDeadBlocks(Worklist);

return nullptr;

}

@@ -1626,8 +1548,7 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {

if (OtherBr->isUnconditional()) {

--BBI;

// Skip over debugging info and pseudo probes.

- while (BBI->isDebugOrPseudoInst() ||

- (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {

+ while (BBI->isDebugOrPseudoInst()) {

if (BBI==OtherBB->begin())

return false;

--BBI;

@@ -1681,7 +1602,7 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {

Builder.SetInsertPoint(OtherStore);

PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),

OtherBB);

- MergedVal = InsertNewInstBefore(PN, DestBB->front());

+ MergedVal = InsertNewInstBefore(PN, DestBB->begin());

PN->setDebugLoc(MergedLoc);

}

@@ -1690,7 +1611,7 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {

StoreInst *NewSI =

new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),

SI.getOrdering(), SI.getSyncScopeID());

- InsertNewInstBefore(NewSI, *BBI);

+ InsertNewInstBefore(NewSI, BBI);

NewSI->setDebugLoc(MergedLoc);

NewSI->mergeDIAssignID({&SI, OtherStore});

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 50458e2773e6..8d5866e98a8e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

@@ -258,9 +258,14 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {

if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) {

// Interpret X * (-1<<C) as (-X) * (1<<C) and try to sink the negation.

// The "* (1<<C)" thus becomes a potential shifting opportunity.

- if (Value *NegOp0 = Negator::Negate(/*IsNegation*/ true, Op0, *this))

- return BinaryOperator::CreateMul(

- NegOp0, ConstantExpr::getNeg(cast<Constant>(Op1)), I.getName());

+ if (Value *NegOp0 =

+ Negator::Negate(/*IsNegation*/ true, HasNSW, Op0, *this)) {

+ auto *Op1C = cast<Constant>(Op1);

+ return replaceInstUsesWith(

+ I, Builder.CreateMul(NegOp0, ConstantExpr::getNeg(Op1C), "",

+ /* HasNUW */ false,

+ HasNSW && Op1C->isNotMinSignedValue()));

+ }

// Try to convert multiply of extended operand to narrow negate and shift

// for better analysis.

@@ -295,9 +300,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {

// Canonicalize (X|C1)*MulC -> X*MulC+C1*MulC.

Value *X;

Constant *C1;

- if ((match(Op0, m_OneUse(m_Add(m_Value(X), m_ImmConstant(C1))))) ||

- (match(Op0, m_OneUse(m_Or(m_Value(X), m_ImmConstant(C1)))) &&

- haveNoCommonBitsSet(X, C1, DL, &AC, &I, &DT))) {

+ if (match(Op0, m_OneUse(m_AddLike(m_Value(X), m_ImmConstant(C1))))) {

// C1*MulC simplifies to a tidier constant.

Value *NewC = Builder.CreateMul(C1, MulC);

auto *BOp0 = cast<BinaryOperator>(Op0);

@@ -555,6 +558,180 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {

return nullptr;

}

+Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {

+ Value *Op0 = I.getOperand(0);

+ Value *Op1 = I.getOperand(1);

+ Value *X, *Y;

+ Constant *C;

+ // Reassociate constant RHS with another constant to form constant

+ // expression.

+ if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {

+ Constant *C1;

+ if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {

+ // (C1 / X) * C --> (C * C1) / X

+ Constant *CC1 =

+ ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);

+ if (CC1 && CC1->isNormalFP())

+ return BinaryOperator::CreateFDivFMF(CC1, X, &I);

+ }

+ if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {

+ // (X / C1) * C --> X * (C / C1)

+ Constant *CDivC1 =

+ ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);

+ if (CDivC1 && CDivC1->isNormalFP())

+ return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);

+ // If the constant was a denormal, try reassociating differently.

+ // (X / C1) * C --> X / (C1 / C)

+ Constant *C1DivC =

+ ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);

+ if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())

+ return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);

+ }

+ // We do not need to match 'fadd C, X' and 'fsub X, C' because they are

+ // canonicalized to 'fadd X, C'. Distributing the multiply may allow

+ // further folds and (X * C) + C2 is 'fma'.

+ if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {

+ // (X + C1) * C --> (X * C) + (C * C1)

+ if (Constant *CC1 =

+ ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {

+ Value *XC = Builder.CreateFMulFMF(X, C, &I);

+ return BinaryOperator::CreateFAddFMF(XC, CC1, &I);

+ }

+ if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {

+ // (C1 - X) * C --> (C * C1) - (X * C)

+ if (Constant *CC1 =

+ ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {

+ Value *XC = Builder.CreateFMulFMF(X, C, &I);

+ return BinaryOperator::CreateFSubFMF(CC1, XC, &I);

+ }

+ Value *Z;

+ if (match(&I,

+ m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))), m_Value(Z)))) {

+ // Sink division: (X / Y) * Z --> (X * Z) / Y

+ Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);

+ return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);

+ }

+ // sqrt(X) * sqrt(Y) -> sqrt(X * Y)

+ // nnan disallows the possibility of returning a number if both operands are

+ // negative (in that case, we should return NaN).

+ if (I.hasNoNaNs() && match(Op0, m_OneUse(m_Sqrt(m_Value(X)))) &&

+ match(Op1, m_OneUse(m_Sqrt(m_Value(Y))))) {

+ Value *XY = Builder.CreateFMulFMF(X, Y, &I);

+ Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I);

+ return replaceInstUsesWith(I, Sqrt);

+ }

+ // The following transforms are done irrespective of the number of uses

+ // for the expression "1.0/sqrt(X)".

+ // 1) 1.0/sqrt(X) * X -> X/sqrt(X)

+ // 2) X * 1.0/sqrt(X) -> X/sqrt(X)

+ // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it

+ // has the necessary (reassoc) fast-math-flags.

+ if (I.hasNoSignedZeros() &&

+ match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&

+ match(Y, m_Sqrt(m_Value(X))) && Op1 == X)

+ return BinaryOperator::CreateFDivFMF(X, Y, &I);

+ if (I.hasNoSignedZeros() &&

+ match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&

+ match(Y, m_Sqrt(m_Value(X))) && Op0 == X)

+ return BinaryOperator::CreateFDivFMF(X, Y, &I);

+ // Like the similar transform in instsimplify, this requires 'nsz' because

+ // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.

+ if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 && Op0->hasNUses(2)) {

+ // Peek through fdiv to find squaring of square root:

+ // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y

+ if (match(Op0, m_FDiv(m_Value(X), m_Sqrt(m_Value(Y))))) {

+ Value *XX = Builder.CreateFMulFMF(X, X, &I);

+ return BinaryOperator::CreateFDivFMF(XX, Y, &I);

+ }

+ // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)

+ if (match(Op0, m_FDiv(m_Sqrt(m_Value(Y)), m_Value(X)))) {

+ Value *XX = Builder.CreateFMulFMF(X, X, &I);

+ return BinaryOperator::CreateFDivFMF(Y, XX, &I);

+ }

+ // pow(X, Y) * X --> pow(X, Y+1)

+ // X * pow(X, Y) --> pow(X, Y+1)

+ if (match(&I, m_c_FMul(m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Value(X),

+ m_Value(Y))),

+ m_Deferred(X)))) {

+ Value *Y1 = Builder.CreateFAddFMF(Y, ConstantFP::get(I.getType(), 1.0), &I);

+ Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, Y1, &I);

+ return replaceInstUsesWith(I, Pow);

+ }

+ if (I.isOnlyUserOfAnyOperand()) {

+ // pow(X, Y) * pow(X, Z) -> pow(X, Y + Z)

+ if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&

+ match(Op1, m_Intrinsic<Intrinsic::pow>(m_Specific(X), m_Value(Z)))) {

+ auto *YZ = Builder.CreateFAddFMF(Y, Z, &I);

+ auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, YZ, &I);

+ return replaceInstUsesWith(I, NewPow);

+ }

+ // pow(X, Y) * pow(Z, Y) -> pow(X * Z, Y)

+ if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&

+ match(Op1, m_Intrinsic<Intrinsic::pow>(m_Value(Z), m_Specific(Y)))) {

+ auto *XZ = Builder.CreateFMulFMF(X, Z, &I);

+ auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, XZ, Y, &I);

+ return replaceInstUsesWith(I, NewPow);

+ }

+ // powi(x, y) * powi(x, z) -> powi(x, y + z)

+ if (match(Op0, m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y))) &&

+ match(Op1, m_Intrinsic<Intrinsic::powi>(m_Specific(X), m_Value(Z))) &&

+ Y->getType() == Z->getType()) {

+ auto *YZ = Builder.CreateAdd(Y, Z);

+ auto *NewPow = Builder.CreateIntrinsic(

+ Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);

+ return replaceInstUsesWith(I, NewPow);

+ }

+ // exp(X) * exp(Y) -> exp(X + Y)

+ if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&

+ match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y)))) {

+ Value *XY = Builder.CreateFAddFMF(X, Y, &I);

+ Value *Exp = Builder.CreateUnaryIntrinsic(Intrinsic::exp, XY, &I);

+ return replaceInstUsesWith(I, Exp);

+ }

+ // exp2(X) * exp2(Y) -> exp2(X + Y)

+ if (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) &&

+ match(Op1, m_Intrinsic<Intrinsic::exp2>(m_Value(Y)))) {

+ Value *XY = Builder.CreateFAddFMF(X, Y, &I);

+ Value *Exp2 = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, XY, &I);

+ return replaceInstUsesWith(I, Exp2);

+ }

+ // (X*Y) * X => (X*X) * Y where Y != X

+ // The purpose is two-fold:

+ // 1) to form a power expression (of X).

+ // 2) potentially shorten the critical path: After transformation, the

+ // latency of the instruction Y is amortized by the expression of X*X,

+ // and therefore Y is in a "less critical" position compared to what it

+ // was before the transformation.

+ if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) && Op1 != Y) {

+ Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I);

+ return BinaryOperator::CreateFMulFMF(XX, Y, &I);

+ }

+ if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) && Op0 != Y) {

+ Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I);

+ return BinaryOperator::CreateFMulFMF(XX, Y, &I);

+ }

+ return nullptr;

Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {

if (Value *V = simplifyFMulInst(I.getOperand(0), I.getOperand(1),

I.getFastMathFlags(),

@@ -602,176 +779,9 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {

if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))

return replaceInstUsesWith(I, V);

- if (I.hasAllowReassoc()) {

- // Reassociate constant RHS with another constant to form constant

- // expression.

- if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {

- Constant *C1;

- if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {

- // (C1 / X) * C --> (C * C1) / X

- Constant *CC1 =

- ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);

- if (CC1 && CC1->isNormalFP())

- return BinaryOperator::CreateFDivFMF(CC1, X, &I);

- }

- if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {

- // (X / C1) * C --> X * (C / C1)

- Constant *CDivC1 =

- ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);

- if (CDivC1 && CDivC1->isNormalFP())

- return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);

- // If the constant was a denormal, try reassociating differently.

- // (X / C1) * C --> X / (C1 / C)

- Constant *C1DivC =

- ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);

- if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())

- return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);

- }

- // We do not need to match 'fadd C, X' and 'fsub X, C' because they are

- // canonicalized to 'fadd X, C'. Distributing the multiply may allow

- // further folds and (X * C) + C2 is 'fma'.

- if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {

- // (X + C1) * C --> (X * C) + (C * C1)

- if (Constant *CC1 = ConstantFoldBinaryOpOperands(

- Instruction::FMul, C, C1, DL)) {

- Value *XC = Builder.CreateFMulFMF(X, C, &I);

- return BinaryOperator::CreateFAddFMF(XC, CC1, &I);

- }

- if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {

- // (C1 - X) * C --> (C * C1) - (X * C)

- if (Constant *CC1 = ConstantFoldBinaryOpOperands(

- Instruction::FMul, C, C1, DL)) {

- Value *XC = Builder.CreateFMulFMF(X, C, &I);

- return BinaryOperator::CreateFSubFMF(CC1, XC, &I);

- }

- Value *Z;

- if (match(&I, m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))),

- m_Value(Z)))) {

- // Sink division: (X / Y) * Z --> (X * Z) / Y

- Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);

- return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);

- }

- // sqrt(X) * sqrt(Y) -> sqrt(X * Y)

- // nnan disallows the possibility of returning a number if both operands are

- // negative (in that case, we should return NaN).

- if (I.hasNoNaNs() && match(Op0, m_OneUse(m_Sqrt(m_Value(X)))) &&

- match(Op1, m_OneUse(m_Sqrt(m_Value(Y))))) {

- Value *XY = Builder.CreateFMulFMF(X, Y, &I);

- Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I);

- return replaceInstUsesWith(I, Sqrt);

- }

- // The following transforms are done irrespective of the number of uses

- // for the expression "1.0/sqrt(X)".

- // 1) 1.0/sqrt(X) * X -> X/sqrt(X)

- // 2) X * 1.0/sqrt(X) -> X/sqrt(X)

- // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it

- // has the necessary (reassoc) fast-math-flags.

- if (I.hasNoSignedZeros() &&

- match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&

- match(Y, m_Sqrt(m_Value(X))) && Op1 == X)

- return BinaryOperator::CreateFDivFMF(X, Y, &I);

- if (I.hasNoSignedZeros() &&

- match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&

- match(Y, m_Sqrt(m_Value(X))) && Op0 == X)

- return BinaryOperator::CreateFDivFMF(X, Y, &I);

- // Like the similar transform in instsimplify, this requires 'nsz' because

- // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.

- if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 &&

- Op0->hasNUses(2)) {

- // Peek through fdiv to find squaring of square root:

- // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y

- if (match(Op0, m_FDiv(m_Value(X), m_Sqrt(m_Value(Y))))) {

- Value *XX = Builder.CreateFMulFMF(X, X, &I);

- return BinaryOperator::CreateFDivFMF(XX, Y, &I);

- }

- // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)

- if (match(Op0, m_FDiv(m_Sqrt(m_Value(Y)), m_Value(X)))) {

- Value *XX = Builder.CreateFMulFMF(X, X, &I);

- return BinaryOperator::CreateFDivFMF(Y, XX, &I);

- }

- // pow(X, Y) * X --> pow(X, Y+1)

- // X * pow(X, Y) --> pow(X, Y+1)

- if (match(&I, m_c_FMul(m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Value(X),

- m_Value(Y))),

- m_Deferred(X)))) {

- Value *Y1 =

- Builder.CreateFAddFMF(Y, ConstantFP::get(I.getType(), 1.0), &I);

- Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, Y1, &I);

- return replaceInstUsesWith(I, Pow);

- }

- if (I.isOnlyUserOfAnyOperand()) {

- // pow(X, Y) * pow(X, Z) -> pow(X, Y + Z)

- if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&

- match(Op1, m_Intrinsic<Intrinsic::pow>(m_Specific(X), m_Value(Z)))) {

- auto *YZ = Builder.CreateFAddFMF(Y, Z, &I);

- auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, YZ, &I);

- return replaceInstUsesWith(I, NewPow);

- }

- // pow(X, Y) * pow(Z, Y) -> pow(X * Z, Y)

- if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&

- match(Op1, m_Intrinsic<Intrinsic::pow>(m_Value(Z), m_Specific(Y)))) {

- auto *XZ = Builder.CreateFMulFMF(X, Z, &I);

- auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, XZ, Y, &I);

- return replaceInstUsesWith(I, NewPow);

- }

- // powi(x, y) * powi(x, z) -> powi(x, y + z)

- if (match(Op0, m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y))) &&

- match(Op1, m_Intrinsic<Intrinsic::powi>(m_Specific(X), m_Value(Z))) &&

- Y->getType() == Z->getType()) {

- auto *YZ = Builder.CreateAdd(Y, Z);

- auto *NewPow = Builder.CreateIntrinsic(

- Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);

- return replaceInstUsesWith(I, NewPow);

- }

- // exp(X) * exp(Y) -> exp(X + Y)

- if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&

- match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y)))) {

- Value *XY = Builder.CreateFAddFMF(X, Y, &I);

- Value *Exp = Builder.CreateUnaryIntrinsic(Intrinsic::exp, XY, &I);

- return replaceInstUsesWith(I, Exp);

- }

- // exp2(X) * exp2(Y) -> exp2(X + Y)

- if (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) &&

- match(Op1, m_Intrinsic<Intrinsic::exp2>(m_Value(Y)))) {

- Value *XY = Builder.CreateFAddFMF(X, Y, &I);

- Value *Exp2 = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, XY, &I);

- return replaceInstUsesWith(I, Exp2);

- }

- // (X*Y) * X => (X*X) * Y where Y != X

- // The purpose is two-fold:

- // 1) to form a power expression (of X).

- // 2) potentially shorten the critical path: After transformation, the

- // latency of the instruction Y is amortized by the expression of X*X,

- // and therefore Y is in a "less critical" position compared to what it

- // was before the transformation.

- if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) &&

- Op1 != Y) {

- Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I);

- return BinaryOperator::CreateFMulFMF(XX, Y, &I);

- }

- if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) &&

- Op0 != Y) {

- Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I);

- return BinaryOperator::CreateFMulFMF(XX, Y, &I);

- }

+ if (I.hasAllowReassoc())

+ if (Instruction *FoldedMul = foldFMulReassoc(I))

+ return FoldedMul;

// log2(X * 0.5) * Y = log2(X) * Y - Y

if (I.isFast()) {

@@ -802,7 +812,7 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {

I.hasNoSignedZeros() && match(Start, m_Zero()))

return replaceInstUsesWith(I, Start);

- // minimun(X, Y) * maximum(X, Y) => X * Y.

+ // minimum(X, Y) * maximum(X, Y) => X * Y.

if (match(&I,

m_c_FMul(m_Intrinsic<Intrinsic::maximum>(m_Value(X), m_Value(Y)),

m_c_Intrinsic<Intrinsic::minimum>(m_Deferred(X),

@@ -918,8 +928,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,

return Remainder.isMinValue();

}

-static Instruction *foldIDivShl(BinaryOperator &I,

- InstCombiner::BuilderTy &Builder) {

+static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) {

assert((I.getOpcode() == Instruction::SDiv ||

I.getOpcode() == Instruction::UDiv) &&

"Expected integer divide");

@@ -928,7 +937,6 @@ static Instruction *foldIDivShl(BinaryOperator &I,

Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);

Type *Ty = I.getType();

- Instruction *Ret = nullptr;

Value *X, *Y, *Z;

// With appropriate no-wrap constraints, remove a common factor in the

@@ -943,12 +951,12 @@ static Instruction *foldIDivShl(BinaryOperator &I,

// (X * Y) u/ (X << Z) --> Y u>> Z

if (!IsSigned && HasNUW)

- Ret = BinaryOperator::CreateLShr(Y, Z);

+ return Builder.CreateLShr(Y, Z, "", I.isExact());

// (X * Y) s/ (X << Z) --> Y s/ (1 << Z)

if (IsSigned && HasNSW && (Op0->hasOneUse() || Op1->hasOneUse())) {

Value *Shl = Builder.CreateShl(ConstantInt::get(Ty, 1), Z);

- Ret = BinaryOperator::CreateSDiv(Y, Shl);

+ return Builder.CreateSDiv(Y, Shl, "", I.isExact());

}

@@ -966,20 +974,38 @@ static Instruction *foldIDivShl(BinaryOperator &I,

((Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap()) ||

(Shl0->hasNoUnsignedWrap() && Shl0->hasNoSignedWrap() &&

Shl1->hasNoSignedWrap())))

- Ret = BinaryOperator::CreateUDiv(X, Y);

+ return Builder.CreateUDiv(X, Y, "", I.isExact());

// For signed div, we need 'nsw' on both shifts + 'nuw' on the divisor.

// (X << Z) / (Y << Z) --> X / Y

if (IsSigned && Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap() &&

Shl1->hasNoUnsignedWrap())

- Ret = BinaryOperator::CreateSDiv(X, Y);

+ return Builder.CreateSDiv(X, Y, "", I.isExact());

}

- if (!Ret)

- return nullptr;

+ // If X << Y and X << Z does not overflow, then:

+ // (X << Y) / (X << Z) -> (1 << Y) / (1 << Z) -> 1 << Y >> Z

+ if (match(Op0, m_Shl(m_Value(X), m_Value(Y))) &&

+ match(Op1, m_Shl(m_Specific(X), m_Value(Z)))) {

+ auto *Shl0 = cast<OverflowingBinaryOperator>(Op0);

+ auto *Shl1 = cast<OverflowingBinaryOperator>(Op1);

- Ret->setIsExact(I.isExact());

- return Ret;

+ if (IsSigned ? (Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap())

+ : (Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap())) {

+ Constant *One = ConstantInt::get(X->getType(), 1);

+ // Only preserve the nsw flag if dividend has nsw

+ // or divisor has nsw and operator is sdiv.

+ Value *Dividend = Builder.CreateShl(

+ One, Y, "shl.dividend",

+ /*HasNUW*/ true,

+ /*HasNSW*/

+ IsSigned ? (Shl0->hasNoUnsignedWrap() || Shl1->hasNoUnsignedWrap())

+ : Shl0->hasNoSignedWrap());

+ return Builder.CreateLShr(Dividend, Z, "", I.isExact());

+ }

+ return nullptr;

}

/// This function implements the transforms common to both integer division

@@ -1156,8 +1182,8 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {

return NewDiv;

}

- if (Instruction *R = foldIDivShl(I, Builder))

- return R;

+ if (Value *R = foldIDivShl(I, Builder))

+ return replaceInstUsesWith(I, R);

// With the appropriate no-wrap constraint, remove a multiply by the divisor

// after peeking through another divide:

@@ -1263,7 +1289,7 @@ static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth,

/// If we have zero-extended operands of an unsigned div or rem, we may be able

/// to narrow the operation (sink the zext below the math).

static Instruction *narrowUDivURem(BinaryOperator &I,

- InstCombiner::BuilderTy &Builder) {

+ InstCombinerImpl &IC) {

Instruction::BinaryOps Opcode = I.getOpcode();

Value *N = I.getOperand(0);

Value *D = I.getOperand(1);

@@ -1273,7 +1299,7 @@ static Instruction *narrowUDivURem(BinaryOperator &I,

X->getType() == Y->getType() && (N->hasOneUse() || D->hasOneUse())) {

// udiv (zext X), (zext Y) --> zext (udiv X, Y)

// urem (zext X), (zext Y) --> zext (urem X, Y)

- Value *NarrowOp = Builder.CreateBinOp(Opcode, X, Y);

+ Value *NarrowOp = IC.Builder.CreateBinOp(Opcode, X, Y);

return new ZExtInst(NarrowOp, Ty);

}

@@ -1281,24 +1307,24 @@ static Instruction *narrowUDivURem(BinaryOperator &I,

if (isa<Instruction>(N) && match(N, m_OneUse(m_ZExt(m_Value(X)))) &&

match(D, m_Constant(C))) {

// If the constant is the same in the smaller type, use the narrow version.

- Constant *TruncC = ConstantExpr::getTrunc(C, X->getType());

- if (ConstantExpr::getZExt(TruncC, Ty) != C)

+ Constant *TruncC = IC.getLosslessUnsignedTrunc(C, X->getType());

+ if (!TruncC)

return nullptr;

// udiv (zext X), C --> zext (udiv X, C')

// urem (zext X), C --> zext (urem X, C')

- return new ZExtInst(Builder.CreateBinOp(Opcode, X, TruncC), Ty);

+ return new ZExtInst(IC.Builder.CreateBinOp(Opcode, X, TruncC), Ty);

}

if (isa<Instruction>(D) && match(D, m_OneUse(m_ZExt(m_Value(X)))) &&

match(N, m_Constant(C))) {

// If the constant is the same in the smaller type, use the narrow version.

- Constant *TruncC = ConstantExpr::getTrunc(C, X->getType());

- if (ConstantExpr::getZExt(TruncC, Ty) != C)

+ Constant *TruncC = IC.getLosslessUnsignedTrunc(C, X->getType());

+ if (!TruncC)

return nullptr;

// udiv C, (zext X) --> zext (udiv C', X)

// urem C, (zext X) --> zext (urem C', X)

- return new ZExtInst(Builder.CreateBinOp(Opcode, TruncC, X), Ty);

+ return new ZExtInst(IC.Builder.CreateBinOp(Opcode, TruncC, X), Ty);

}

return nullptr;

@@ -1346,7 +1372,7 @@ Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) {

return CastInst::CreateZExtOrBitCast(Cmp, Ty);

}

- if (Instruction *NarrowDiv = narrowUDivURem(I, Builder))

+ if (Instruction *NarrowDiv = narrowUDivURem(I, *this))

return NarrowDiv;

// If the udiv operands are non-overflowing multiplies with a common operand,

@@ -1405,7 +1431,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {

// sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined)

if (match(Op1, m_AllOnes()) ||

(match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))

- return BinaryOperator::CreateNeg(Op0);

+ return BinaryOperator::CreateNSWNeg(Op0);

// X / INT_MIN --> X == INT_MIN

if (match(Op1, m_SignMask()))

@@ -1428,7 +1454,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {

Constant *NegPow2C = ConstantExpr::getNeg(cast<Constant>(Op1));

Constant *C = ConstantExpr::getExactLogBase2(NegPow2C);

Value *Ashr = Builder.CreateAShr(Op0, C, I.getName() + ".neg", true);

- return BinaryOperator::CreateNeg(Ashr);

+ return BinaryOperator::CreateNSWNeg(Ashr);

}

@@ -1490,7 +1516,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {

if (KnownDividend.isNonNegative()) {

// If both operands are unsigned, turn this into a udiv.

- if (isKnownNonNegative(Op1, DL, 0, &AC, &I, &DT)) {

+ if (isKnownNonNegative(Op1, SQ.getWithInstruction(&I))) {

auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());

BO->setIsExact(I.isExact());

return BO;

@@ -1516,6 +1542,13 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {

}

+ // -X / X --> X == INT_MIN ? 1 : -1

+ if (isKnownNegation(Op0, Op1)) {

+ APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits());

+ Value *Cond = Builder.CreateICmpEQ(Op0, ConstantInt::get(Ty, MinVal));

+ return SelectInst::Create(Cond, ConstantInt::get(Ty, 1),

+ ConstantInt::getAllOnesValue(Ty));

+ }

return nullptr;

}

@@ -1759,6 +1792,21 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {

return replaceInstUsesWith(I, Pow);

}

+ // powi(X, Y) / X --> powi(X, Y-1)

+ // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan

+ // are required.

+ // TODO: Multi-use may be also better off creating Powi(x,y-1)

+ if (I.hasAllowReassoc() && I.hasNoNaNs() &&

+ match(Op0, m_OneUse(m_Intrinsic<Intrinsic::powi>(m_Specific(Op1),

+ m_Value(Y)))) &&

+ willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {

+ Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());

+ Value *Y1 = Builder.CreateAdd(Y, NegOne);

+ Type *Types[] = {Op1->getType(), Y1->getType()};

+ Value *Pow = Builder.CreateIntrinsic(Intrinsic::powi, Types, {Op1, Y1}, &I);

+ return replaceInstUsesWith(I, Pow);

+ }

return nullptr;

}

@@ -1936,7 +1984,7 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {

if (Instruction *common = commonIRemTransforms(I))

return common;

- if (Instruction *NarrowRem = narrowUDivURem(I, Builder))

+ if (Instruction *NarrowRem = narrowUDivURem(I, *this))

return NarrowRem;

// X urem Y -> X and Y-1, where Y is a power of 2,

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
index e24abc48424d..513b185c83a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp

@@ -20,7 +20,6 @@

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/ADT/Twine.h"

-#include "llvm/ADT/iterator_range.h"

#include "llvm/Analysis/TargetFolder.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/Constant.h"

@@ -98,14 +97,13 @@ static cl::opt<unsigned>

cl::desc("What is the maximal lookup depth when trying to "

"check for viability of negation sinking."));

-Negator::Negator(LLVMContext &C, const DataLayout &DL_, AssumptionCache &AC_,

- const DominatorTree &DT_, bool IsTrulyNegation_)

- : Builder(C, TargetFolder(DL_),

+Negator::Negator(LLVMContext &C, const DataLayout &DL, bool IsTrulyNegation_)

+ : Builder(C, TargetFolder(DL),

IRBuilderCallbackInserter([&](Instruction *I) {

++NegatorNumInstructionsCreatedTotal;

NewInstructions.push_back(I);

})),

- DL(DL_), AC(AC_), DT(DT_), IsTrulyNegation(IsTrulyNegation_) {}

+ IsTrulyNegation(IsTrulyNegation_) {}

#if LLVM_ENABLE_STATS

Negator::~Negator() {

@@ -128,7 +126,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

// FIXME: can this be reworked into a worklist-based algorithm while preserving

// the depth-first, early bailout traversal?

-[[nodiscard]] Value *Negator::visitImpl(Value *V, unsigned Depth) {

+[[nodiscard]] Value *Negator::visitImpl(Value *V, bool IsNSW, unsigned Depth) {

// -(undef) -> undef.

if (match(V, m_Undef()))

return V;

@@ -237,7 +235,8 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

// However, only do this either if the old `sub` doesn't stick around, or

// it was subtracting from a constant. Otherwise, this isn't profitable.

return Builder.CreateSub(I->getOperand(1), I->getOperand(0),

- I->getName() + ".neg");

+ I->getName() + ".neg", /* HasNUW */ false,

+ IsNSW && I->hasNoSignedWrap());

}

// Some other cases, while still don't require recursion,

@@ -302,7 +301,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

switch (I->getOpcode()) {

case Instruction::Freeze: {

// `freeze` is negatible if its operand is negatible.

- Value *NegOp = negate(I->getOperand(0), Depth + 1);

+ Value *NegOp = negate(I->getOperand(0), IsNSW, Depth + 1);

if (!NegOp) // Early return.

return nullptr;

return Builder.CreateFreeze(NegOp, I->getName() + ".neg");

@@ -313,7 +312,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

SmallVector<Value *, 4> NegatedIncomingValues(PHI->getNumOperands());

for (auto I : zip(PHI->incoming_values(), NegatedIncomingValues)) {

if (!(std::get<1>(I) =

- negate(std::get<0>(I), Depth + 1))) // Early return.

+ negate(std::get<0>(I), IsNSW, Depth + 1))) // Early return.

return nullptr;

}

// All incoming values are indeed negatible. Create negated PHI node.

@@ -336,10 +335,10 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

return NewSelect;

}

// `select` is negatible if both hands of `select` are negatible.

- Value *NegOp1 = negate(I->getOperand(1), Depth + 1);

+ Value *NegOp1 = negate(I->getOperand(1), IsNSW, Depth + 1);

if (!NegOp1) // Early return.

return nullptr;

- Value *NegOp2 = negate(I->getOperand(2), Depth + 1);

+ Value *NegOp2 = negate(I->getOperand(2), IsNSW, Depth + 1);

if (!NegOp2)

return nullptr;

// Do preserve the metadata!

@@ -349,10 +348,10 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

case Instruction::ShuffleVector: {

// `shufflevector` is negatible if both operands are negatible.

auto *Shuf = cast<ShuffleVectorInst>(I);

- Value *NegOp0 = negate(I->getOperand(0), Depth + 1);

+ Value *NegOp0 = negate(I->getOperand(0), IsNSW, Depth + 1);

if (!NegOp0) // Early return.

return nullptr;

- Value *NegOp1 = negate(I->getOperand(1), Depth + 1);

+ Value *NegOp1 = negate(I->getOperand(1), IsNSW, Depth + 1);

if (!NegOp1)

return nullptr;

return Builder.CreateShuffleVector(NegOp0, NegOp1, Shuf->getShuffleMask(),

@@ -361,7 +360,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

case Instruction::ExtractElement: {

// `extractelement` is negatible if source operand is negatible.

auto *EEI = cast<ExtractElementInst>(I);

- Value *NegVector = negate(EEI->getVectorOperand(), Depth + 1);

+ Value *NegVector = negate(EEI->getVectorOperand(), IsNSW, Depth + 1);

if (!NegVector) // Early return.

return nullptr;

return Builder.CreateExtractElement(NegVector, EEI->getIndexOperand(),

@@ -371,10 +370,10 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

// `insertelement` is negatible if both the source vector and

// element-to-be-inserted are negatible.

auto *IEI = cast<InsertElementInst>(I);

- Value *NegVector = negate(IEI->getOperand(0), Depth + 1);

+ Value *NegVector = negate(IEI->getOperand(0), IsNSW, Depth + 1);

if (!NegVector) // Early return.

return nullptr;

- Value *NegNewElt = negate(IEI->getOperand(1), Depth + 1);

+ Value *NegNewElt = negate(IEI->getOperand(1), IsNSW, Depth + 1);

if (!NegNewElt) // Early return.

return nullptr;

return Builder.CreateInsertElement(NegVector, NegNewElt, IEI->getOperand(2),

@@ -382,15 +381,17 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

}

case Instruction::Trunc: {

// `trunc` is negatible if its operand is negatible.

- Value *NegOp = negate(I->getOperand(0), Depth + 1);

+ Value *NegOp = negate(I->getOperand(0), /* IsNSW */ false, Depth + 1);

if (!NegOp) // Early return.

return nullptr;

return Builder.CreateTrunc(NegOp, I->getType(), I->getName() + ".neg");

}

case Instruction::Shl: {

// `shl` is negatible if the first operand is negatible.

- if (Value *NegOp0 = negate(I->getOperand(0), Depth + 1))

- return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg");

+ IsNSW &= I->hasNoSignedWrap();

+ if (Value *NegOp0 = negate(I->getOperand(0), IsNSW, Depth + 1))

+ return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg",

+ /* HasNUW */ false, IsNSW);

// Otherwise, `shl %x, C` can be interpreted as `mul %x, 1<<C`.

auto *Op1C = dyn_cast<Constant>(I->getOperand(1));

if (!Op1C || !IsTrulyNegation)

@@ -398,11 +399,10 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

return Builder.CreateMul(

I->getOperand(0),

ConstantExpr::getShl(Constant::getAllOnesValue(Op1C->getType()), Op1C),

- I->getName() + ".neg");

+ I->getName() + ".neg", /* HasNUW */ false, IsNSW);

}

case Instruction::Or: {

- if (!haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL, &AC, I,

- &DT))

+ if (!cast<PossiblyDisjointInst>(I)->isDisjoint())

return nullptr; // Don't know how to handle `or` in general.

std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I);

// `or`/`add` are interchangeable when operands have no common bits set.

@@ -417,7 +417,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

SmallVector<Value *, 2> NegatedOps, NonNegatedOps;

for (Value *Op : I->operands()) {

// Can we sink the negation into this operand?

- if (Value *NegOp = negate(Op, Depth + 1)) {

+ if (Value *NegOp = negate(Op, /* IsNSW */ false, Depth + 1)) {

NegatedOps.emplace_back(NegOp); // Successfully negated operand!

continue;

}

@@ -446,9 +446,11 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

// `xor` is negatible if one of its operands is invertible.

// FIXME: InstCombineInverter? But how to connect Inverter and Negator?

if (auto *C = dyn_cast<Constant>(Ops[1])) {

- Value *Xor = Builder.CreateXor(Ops[0], ConstantExpr::getNot(C));

- return Builder.CreateAdd(Xor, ConstantInt::get(Xor->getType(), 1),

- I->getName() + ".neg");

+ if (IsTrulyNegation) {

+ Value *Xor = Builder.CreateXor(Ops[0], ConstantExpr::getNot(C));

+ return Builder.CreateAdd(Xor, ConstantInt::get(Xor->getType(), 1),

+ I->getName() + ".neg");

+ }

}

return nullptr;

}

@@ -458,16 +460,17 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

Value *NegatedOp, *OtherOp;

// First try the second operand, in case it's a constant it will be best to

// just invert it instead of sinking the `neg` deeper.

- if (Value *NegOp1 = negate(Ops[1], Depth + 1)) {

+ if (Value *NegOp1 = negate(Ops[1], /* IsNSW */ false, Depth + 1)) {

NegatedOp = NegOp1;

OtherOp = Ops[0];

- } else if (Value *NegOp0 = negate(Ops[0], Depth + 1)) {

+ } else if (Value *NegOp0 = negate(Ops[0], /* IsNSW */ false, Depth + 1)) {

NegatedOp = NegOp0;

OtherOp = Ops[1];

} else

// Can't negate either of them.

return nullptr;

- return Builder.CreateMul(NegatedOp, OtherOp, I->getName() + ".neg");

+ return Builder.CreateMul(NegatedOp, OtherOp, I->getName() + ".neg",

+ /* HasNUW */ false, IsNSW && I->hasNoSignedWrap());

}

default:

return nullptr; // Don't know, likely not negatible for free.

@@ -476,7 +479,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

llvm_unreachable("Can't get here. We always return from switch.");

}

-[[nodiscard]] Value *Negator::negate(Value *V, unsigned Depth) {

+[[nodiscard]] Value *Negator::negate(Value *V, bool IsNSW, unsigned Depth) {

NegatorMaxDepthVisited.updateMax(Depth);

++NegatorNumValuesVisited;

@@ -506,15 +509,16 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

#endif

// No luck. Try negating it for real.

- Value *NegatedV = visitImpl(V, Depth);

+ Value *NegatedV = visitImpl(V, IsNSW, Depth);

// And cache the (real) result for the future.

NegationsCache[V] = NegatedV;

return NegatedV;

}

-[[nodiscard]] std::optional<Negator::Result> Negator::run(Value *Root) {

- Value *Negated = negate(Root, /*Depth=*/0);

+[[nodiscard]] std::optional<Negator::Result> Negator::run(Value *Root,

+ bool IsNSW) {

+ Value *Negated = negate(Root, IsNSW, /*Depth=*/0);

if (!Negated) {

// We must cleanup newly-inserted instructions, to avoid any potential

// endless combine looping.

@@ -525,7 +529,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

return std::make_pair(ArrayRef<Instruction *>(NewInstructions), Negated);

}

-[[nodiscard]] Value *Negator::Negate(bool LHSIsZero, Value *Root,

+[[nodiscard]] Value *Negator::Negate(bool LHSIsZero, bool IsNSW, Value *Root,

InstCombinerImpl &IC) {

++NegatorTotalNegationsAttempted;

LLVM_DEBUG(dbgs() << "Negator: attempting to sink negation into " << *Root

@@ -534,9 +538,8 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {

if (!NegatorEnabled || !DebugCounter::shouldExecute(NegatorCounter))

return nullptr;

- Negator N(Root->getContext(), IC.getDataLayout(), IC.getAssumptionCache(),

- IC.getDominatorTree(), LHSIsZero);

- std::optional<Result> Res = N.run(Root);

+ Negator N(Root->getContext(), IC.getDataLayout(), LHSIsZero);

+ std::optional<Result> Res = N.run(Root, IsNSW);

if (!Res) { // Negation failed.

LLVM_DEBUG(dbgs() << "Negator: failed to sink negation into " << *Root

<< "\n");

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 2f6aa85062a5..20b34c1379d5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp

@@ -248,7 +248,7 @@ bool InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {

PHINode *NewPtrPHI = PHINode::Create(

IntToPtr->getType(), PN.getNumIncomingValues(), PN.getName() + ".ptr");

- InsertNewInstBefore(NewPtrPHI, PN);

+ InsertNewInstBefore(NewPtrPHI, PN.getIterator());

SmallDenseMap<Value *, Instruction *> Casts;

for (auto Incoming : zip(PN.blocks(), AvailablePtrVals)) {

auto *IncomingBB = std::get<0>(Incoming);

@@ -285,10 +285,10 @@ bool InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {

if (isa<PHINode>(IncomingI))

InsertPos = BB->getFirstInsertionPt();

assert(InsertPos != BB->end() && "should have checked above");

- InsertNewInstBefore(CI, *InsertPos);

+ InsertNewInstBefore(CI, InsertPos);

} else {

auto *InsertBB = &IncomingBB->getParent()->getEntryBlock();

- InsertNewInstBefore(CI, *InsertBB->getFirstInsertionPt());

+ InsertNewInstBefore(CI, InsertBB->getFirstInsertionPt());

}

NewPtrPHI->addIncoming(CI, IncomingBB);

@@ -353,7 +353,7 @@ InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) {

NewOperand->addIncoming(

cast<InsertValueInst>(std::get<1>(Incoming))->getOperand(OpIdx),

std::get<0>(Incoming));

- InsertNewInstBefore(NewOperand, PN);

+ InsertNewInstBefore(NewOperand, PN.getIterator());

}

// And finally, create `insertvalue` over the newly-formed PHI nodes.

@@ -391,7 +391,7 @@ InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) {

NewAggregateOperand->addIncoming(

cast<ExtractValueInst>(std::get<1>(Incoming))->getAggregateOperand(),

std::get<0>(Incoming));

- InsertNewInstBefore(NewAggregateOperand, PN);

+ InsertNewInstBefore(NewAggregateOperand, PN.getIterator());

// And finally, create `extractvalue` over the newly-formed PHI nodes.

auto *NewEVI = ExtractValueInst::Create(NewAggregateOperand,

@@ -450,7 +450,7 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {

NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),

FirstInst->getOperand(0)->getName() + ".pn");

NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));

- InsertNewInstBefore(NewLHS, PN);

+ InsertNewInstBefore(NewLHS, PN.getIterator());

LHSVal = NewLHS;

}

@@ -458,7 +458,7 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {

NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),

FirstInst->getOperand(1)->getName() + ".pn");

NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));

- InsertNewInstBefore(NewRHS, PN);

+ InsertNewInstBefore(NewRHS, PN.getIterator());

RHSVal = NewRHS;

}

@@ -581,7 +581,7 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {

Value *FirstOp = FirstInst->getOperand(I);

PHINode *NewPN =

PHINode::Create(FirstOp->getType(), E, FirstOp->getName() + ".pn");

- InsertNewInstBefore(NewPN, PN);

+ InsertNewInstBefore(NewPN, PN.getIterator());

NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));

OperandPhis[I] = NewPN;

@@ -769,7 +769,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {

NewLI->setOperand(0, InVal);

delete NewPN;

} else {

- InsertNewInstBefore(NewPN, PN);

+ InsertNewInstBefore(NewPN, PN.getIterator());

}

// If this was a volatile load that we are merging, make sure to loop through

@@ -825,8 +825,8 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) {

NumZexts++;

} else if (auto *C = dyn_cast<Constant>(V)) {

// Make sure that constants can fit in the new type.

- Constant *Trunc = ConstantExpr::getTrunc(C, NarrowType);

- if (ConstantExpr::getZExt(Trunc, C->getType()) != C)

+ Constant *Trunc = getLosslessUnsignedTrunc(C, NarrowType);

+ if (!Trunc)

return nullptr;

NewIncoming.push_back(Trunc);

NumConsts++;

@@ -853,7 +853,7 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) {

for (unsigned I = 0; I != NumIncomingValues; ++I)

NewPhi->addIncoming(NewIncoming[I], Phi.getIncomingBlock(I));

- InsertNewInstBefore(NewPhi, Phi);

+ InsertNewInstBefore(NewPhi, Phi.getIterator());

return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType());

}

@@ -943,7 +943,7 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {

PhiVal = InVal;

delete NewPN;

} else {

- InsertNewInstBefore(NewPN, PN);

+ InsertNewInstBefore(NewPN, PN.getIterator());

PhiVal = NewPN;

}

@@ -996,8 +996,8 @@ static bool isDeadPHICycle(PHINode *PN,

/// Return true if this phi node is always equal to NonPhiInVal.

/// This happens with mutually cyclic phi nodes like:

/// z = some value; x = phi (y, z); y = phi (x, z)

-static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,

- SmallPtrSetImpl<PHINode*> &ValueEqualPHIs) {

+static bool PHIsEqualValue(PHINode *PN, Value *&NonPhiInVal,

+ SmallPtrSetImpl<PHINode *> &ValueEqualPHIs) {

// See if we already saw this PHI node.

if (!ValueEqualPHIs.insert(PN).second)

return true;

@@ -1010,8 +1010,11 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,

// the value.

for (Value *Op : PN->incoming_values()) {

if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {

- if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))

- return false;

+ if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) {

+ if (NonPhiInVal)

+ return false;

+ NonPhiInVal = OpPN;

+ }

} else if (Op != NonPhiInVal)

return false;

}

@@ -1368,7 +1371,7 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,

// sinking.

auto InsertPt = BB->getFirstInsertionPt();

if (InsertPt != BB->end()) {

- Self.Builder.SetInsertPoint(&*InsertPt);

+ Self.Builder.SetInsertPoint(&*BB, InsertPt);

return Self.Builder.CreateNot(Cond);

}

@@ -1437,22 +1440,45 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {

// are induction variable analysis (sometimes) and ADCE, which is only run

// late.

if (PHIUser->hasOneUse() &&

- (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&

+ (isa<BinaryOperator>(PHIUser) || isa<UnaryOperator>(PHIUser) ||

+ isa<GetElementPtrInst>(PHIUser)) &&

PHIUser->user_back() == &PN) {

return replaceInstUsesWith(PN, PoisonValue::get(PN.getType()));

}

- // When a PHI is used only to be compared with zero, it is safe to replace

- // an incoming value proved as known nonzero with any non-zero constant.

- // For example, in the code below, the incoming value %v can be replaced

- // with any non-zero constant based on the fact that the PHI is only used to

- // be compared with zero and %v is a known non-zero value:

- // %v = select %cond, 1, 2

- // %p = phi [%v, BB] ...

- // icmp eq, %p, 0

- auto *CmpInst = dyn_cast<ICmpInst>(PHIUser);

- // FIXME: To be simple, handle only integer type for now.

- if (CmpInst && isa<IntegerType>(PN.getType()) && CmpInst->isEquality() &&

- match(CmpInst->getOperand(1), m_Zero())) {

+ }

+ // When a PHI is used only to be compared with zero, it is safe to replace

+ // an incoming value proved as known nonzero with any non-zero constant.

+ // For example, in the code below, the incoming value %v can be replaced

+ // with any non-zero constant based on the fact that the PHI is only used to

+ // be compared with zero and %v is a known non-zero value:

+ // %v = select %cond, 1, 2

+ // %p = phi [%v, BB] ...

+ // icmp eq, %p, 0

+ // FIXME: To be simple, handle only integer type for now.

+ // This handles a small number of uses to keep the complexity down, and an

+ // icmp(or(phi)) can equally be replaced with any non-zero constant as the

+ // "or" will only add bits.

+ if (!PN.hasNUsesOrMore(3)) {

+ SmallVector<Instruction *> DropPoisonFlags;

+ bool AllUsesOfPhiEndsInCmp = all_of(PN.users(), [&](User *U) {

+ auto *CmpInst = dyn_cast<ICmpInst>(U);

+ if (!CmpInst) {

+ // This is always correct as OR only add bits and we are checking

+ // against 0.

+ if (U->hasOneUse() && match(U, m_c_Or(m_Specific(&PN), m_Value()))) {

+ DropPoisonFlags.push_back(cast<Instruction>(U));

+ CmpInst = dyn_cast<ICmpInst>(U->user_back());

+ }

+ if (!CmpInst || !isa<IntegerType>(PN.getType()) ||

+ !CmpInst->isEquality() || !match(CmpInst->getOperand(1), m_Zero())) {

+ return false;

+ }

+ return true;

+ });

+ // All uses of PHI results in a compare with zero.

+ if (AllUsesOfPhiEndsInCmp) {

ConstantInt *NonZeroConst = nullptr;

bool MadeChange = false;

for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {

@@ -1461,9 +1487,11 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {

if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) {

if (!NonZeroConst)

NonZeroConst = getAnyNonZeroConstInt(PN);

if (NonZeroConst != VA) {

replaceOperand(PN, I, NonZeroConst);

+ // The "disjoint" flag may no longer hold after the transform.

+ for (Instruction *I : DropPoisonFlags)

+ I->dropPoisonGeneratingFlags();

MadeChange = true;

}

@@ -1478,7 +1506,9 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {

// z = some value; x = phi (y, z); y = phi (x, z)

// where the phi nodes don't necessarily need to be in the same block. Do a

// quick check to see if the PHI node only contains a single non-phi value, if

- // so, scan to see if the phi cycle is actually equal to that value.

+ // so, scan to see if the phi cycle is actually equal to that value. If the

+ // phi has no non-phi values then allow the "NonPhiInVal" to be set later if

+ // one of the phis itself does not have a single input.

{

unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues();

// Scan for the first non-phi operand.

@@ -1486,25 +1516,25 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {

isa<PHINode>(PN.getIncomingValue(InValNo)))

++InValNo;

- if (InValNo != NumIncomingVals) {

- Value *NonPhiInVal = PN.getIncomingValue(InValNo);

+ Value *NonPhiInVal =

+ InValNo != NumIncomingVals ? PN.getIncomingValue(InValNo) : nullptr;

- // Scan the rest of the operands to see if there are any conflicts, if so

- // there is no need to recursively scan other phis.

+ // Scan the rest of the operands to see if there are any conflicts, if so

+ // there is no need to recursively scan other phis.

+ if (NonPhiInVal)

for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {

Value *OpVal = PN.getIncomingValue(InValNo);

if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))

break;

}

- // If we scanned over all operands, then we have one unique value plus

- // phi values. Scan PHI nodes to see if they all merge in each other or

- // the value.

- if (InValNo == NumIncomingVals) {

- SmallPtrSet<PHINode*, 16> ValueEqualPHIs;

- if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))

- return replaceInstUsesWith(PN, NonPhiInVal);

- }

+ // If we scanned over all operands, then we have one unique value plus

+ // phi values. Scan PHI nodes to see if they all merge in each other or

+ // the value.

+ if (InValNo == NumIncomingVals) {

+ SmallPtrSet<PHINode *, 16> ValueEqualPHIs;

+ if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))

+ return replaceInstUsesWith(PN, NonPhiInVal);

}

@@ -1512,11 +1542,12 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {

// the blocks in the same order. This will help identical PHIs be eliminated

// by other passes. Other passes shouldn't depend on this for correctness

// however.

- PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());

- if (&PN != FirstPN)

- for (unsigned I = 0, E = FirstPN->getNumIncomingValues(); I != E; ++I) {

+ auto Res = PredOrder.try_emplace(PN.getParent());

+ if (!Res.second) {

+ const auto &Preds = Res.first->second;

+ for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {

BasicBlock *BBA = PN.getIncomingBlock(I);

- BasicBlock *BBB = FirstPN->getIncomingBlock(I);

+ BasicBlock *BBB = Preds[I];

if (BBA != BBB) {

Value *VA = PN.getIncomingValue(I);

unsigned J = PN.getBasicBlockIndex(BBB);

@@ -1531,6 +1562,10 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {

// this in this case.

}

+ } else {

+ // Remember the block order of the first encountered phi node.

+ append_range(Res.first->second, PN.blocks());

+ }

// Is there an identical PHI node in this basic block?

for (PHINode &IdenticalPN : PN.getParent()->phis()) {

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 661c50062223..2dda46986f0f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

@@ -689,34 +689,40 @@ static Value *foldSelectICmpLshrAshr(const ICmpInst *IC, Value *TrueVal,

}

/// We want to turn:

-/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))

+/// (select (icmp eq (and X, C1), 0), Y, (BinOp Y, C2))

/// into:

-/// (or (shl (and X, C1), C3), Y)

+/// IF C2 u>= C1

+/// (BinOp Y, (shl (and X, C1), C3))

+/// ELSE

+/// (BinOp Y, (lshr (and X, C1), C3))

/// iff:

+/// 0 on the RHS is the identity value (i.e add, xor, shl, etc...)

/// C1 and C2 are both powers of 2

/// where:

-/// C3 = Log(C2) - Log(C1)

+/// IF C2 u>= C1

+/// C3 = Log(C2) - Log(C1)

+/// ELSE

+/// C3 = Log(C1) - Log(C2)

///

/// This transform handles cases where:

/// 1. The icmp predicate is inverted

/// 2. The select operands are reversed

/// 3. The magnitude of C2 and C1 are flipped

-static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,

+static Value *foldSelectICmpAndBinOp(const ICmpInst *IC, Value *TrueVal,

Value *FalseVal,

InstCombiner::BuilderTy &Builder) {

// Only handle integer compares. Also, if this is a vector select, we need a

// vector compare.

if (!TrueVal->getType()->isIntOrIntVectorTy() ||

- TrueVal->getType()->isVectorTy() != IC->getType()->isVectorTy())

+ TrueVal->getType()->isVectorTy() != IC->getType()->isVectorTy())

return nullptr;

Value *CmpLHS = IC->getOperand(0);

Value *CmpRHS = IC->getOperand(1);

- Value *V;

unsigned C1Log;

- bool IsEqualZero;

bool NeedAnd = false;

+ CmpInst::Predicate Pred = IC->getPredicate();

if (IC->isEquality()) {

if (!match(CmpRHS, m_Zero()))

return nullptr;

@@ -725,49 +731,49 @@ static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,

if (!match(CmpLHS, m_And(m_Value(), m_Power2(C1))))

return nullptr;

- V = CmpLHS;

C1Log = C1->logBase2();

- IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_EQ;

- } else if (IC->getPredicate() == ICmpInst::ICMP_SLT ||

- IC->getPredicate() == ICmpInst::ICMP_SGT) {

- // We also need to recognize (icmp slt (trunc (X)), 0) and

- // (icmp sgt (trunc (X)), -1).

- IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_SGT;

- if ((IsEqualZero && !match(CmpRHS, m_AllOnes())) ||

- (!IsEqualZero && !match(CmpRHS, m_Zero())))

- return nullptr;

- if (!match(CmpLHS, m_OneUse(m_Trunc(m_Value(V)))))

+ } else {

+ APInt C1;

+ if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CmpLHS, C1) ||

+ !C1.isPowerOf2())

return nullptr;

- C1Log = CmpLHS->getType()->getScalarSizeInBits() - 1;

+ C1Log = C1.logBase2();

NeedAnd = true;

- } else {

- return nullptr;

}

+ Value *Y, *V = CmpLHS;

+ BinaryOperator *BinOp;

const APInt *C2;

- bool OrOnTrueVal = false;

- bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));

- if (!OrOnFalseVal)

- OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));

- if (!OrOnFalseVal && !OrOnTrueVal)

+ bool NeedXor;

+ if (match(FalseVal, m_BinOp(m_Specific(TrueVal), m_Power2(C2)))) {

+ Y = TrueVal;

+ BinOp = cast<BinaryOperator>(FalseVal);

+ NeedXor = Pred == ICmpInst::ICMP_NE;

+ } else if (match(TrueVal, m_BinOp(m_Specific(FalseVal), m_Power2(C2)))) {

+ Y = FalseVal;

+ BinOp = cast<BinaryOperator>(TrueVal);

+ NeedXor = Pred == ICmpInst::ICMP_EQ;

+ } else {

return nullptr;

+ }

- Value *Y = OrOnFalseVal ? TrueVal : FalseVal;

+ // Check that 0 on RHS is identity value for this binop.

+ auto *IdentityC =

+ ConstantExpr::getBinOpIdentity(BinOp->getOpcode(), BinOp->getType(),

+ /*AllowRHSConstant*/ true);

+ if (IdentityC == nullptr || !IdentityC->isNullValue())

+ return nullptr;

unsigned C2Log = C2->logBase2();

- bool NeedXor = (!IsEqualZero && OrOnFalseVal) || (IsEqualZero && OrOnTrueVal);

bool NeedShift = C1Log != C2Log;

bool NeedZExtTrunc = Y->getType()->getScalarSizeInBits() !=

V->getType()->getScalarSizeInBits();

// Make sure we don't create more instructions than we save.

- Value *Or = OrOnFalseVal ? FalseVal : TrueVal;

- if ((NeedShift + NeedXor + NeedZExtTrunc) >

- (IC->hasOneUse() + Or->hasOneUse()))

+ if ((NeedShift + NeedXor + NeedZExtTrunc + NeedAnd) >

+ (IC->hasOneUse() + BinOp->hasOneUse()))

return nullptr;

if (NeedAnd) {

@@ -788,7 +794,7 @@ static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,

if (NeedXor)

V = Builder.CreateXor(V, *C2);

- return Builder.CreateOr(V, Y);

+ return Builder.CreateBinOp(BinOp->getOpcode(), Y, V);

}

/// Canonicalize a set or clear of a masked set of constant bits to

@@ -870,7 +876,7 @@ static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) {

auto *FalseValI = cast<Instruction>(FalseVal);

auto *FrY = IC.InsertNewInstBefore(new FreezeInst(Y, Y->getName() + ".fr"),

- *FalseValI);

+ FalseValI->getIterator());

IC.replaceOperand(*FalseValI, FalseValI->getOperand(0) == Y ? 0 : 1, FrY);

return IC.replaceInstUsesWith(SI, FalseValI);

}

@@ -1303,45 +1309,28 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,

return nullptr;

// InstSimplify already performed this fold if it was possible subject to

- // current poison-generating flags. Try the transform again with

- // poison-generating flags temporarily dropped.

- bool WasNUW = false, WasNSW = false, WasExact = false, WasInBounds = false;

- if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(FalseVal)) {

- WasNUW = OBO->hasNoUnsignedWrap();

- WasNSW = OBO->hasNoSignedWrap();

- FalseInst->setHasNoUnsignedWrap(false);

- FalseInst->setHasNoSignedWrap(false);

- }

- if (auto *PEO = dyn_cast<PossiblyExactOperator>(FalseVal)) {

- WasExact = PEO->isExact();

- FalseInst->setIsExact(false);

- }

- if (auto *GEP = dyn_cast<GetElementPtrInst>(FalseVal)) {

- WasInBounds = GEP->isInBounds();

- GEP->setIsInBounds(false);

- }

+ // current poison-generating flags. Check whether dropping poison-generating

+ // flags enables the transform.

// Try each equivalence substitution possibility.

// We have an 'EQ' comparison, so the select's false value will propagate.

// Example:

// (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1

+ SmallVector<Instruction *> DropFlags;

if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ,

- /* AllowRefinement */ false) == TrueVal ||

+ /* AllowRefinement */ false,

+ &DropFlags) == TrueVal ||

simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ,

- /* AllowRefinement */ false) == TrueVal) {

+ /* AllowRefinement */ false,

+ &DropFlags) == TrueVal) {

+ for (Instruction *I : DropFlags) {

+ I->dropPoisonGeneratingFlagsAndMetadata();

+ Worklist.add(I);

+ }

return replaceInstUsesWith(Sel, FalseVal);

}

- // Restore poison-generating flags if the transform did not apply.

- if (WasNUW)

- FalseInst->setHasNoUnsignedWrap();

- if (WasNSW)

- FalseInst->setHasNoSignedWrap();

- if (WasExact)

- FalseInst->setIsExact();

- if (WasInBounds)

- cast<GetElementPtrInst>(FalseInst)->setIsInBounds();

return nullptr;

}

@@ -1506,8 +1495,13 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,

if (!match(ReplacementLow, m_ImmConstant(LowC)) ||

!match(ReplacementHigh, m_ImmConstant(HighC)))

return nullptr;

- ReplacementLow = ConstantExpr::getSExt(LowC, X->getType());

- ReplacementHigh = ConstantExpr::getSExt(HighC, X->getType());

+ const DataLayout &DL = Sel0.getModule()->getDataLayout();

+ ReplacementLow =

+ ConstantFoldCastOperand(Instruction::SExt, LowC, X->getType(), DL);

+ ReplacementHigh =

+ ConstantFoldCastOperand(Instruction::SExt, HighC, X->getType(), DL);

+ assert(ReplacementLow && ReplacementHigh &&

+ "Constant folding of ImmConstant cannot fail");

}

// All good, finally emit the new pattern.

@@ -1797,7 +1791,7 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,

if (Instruction *V = foldSelectZeroOrOnes(ICI, TrueVal, FalseVal, Builder))

return V;

- if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder))

+ if (Value *V = foldSelectICmpAndBinOp(ICI, TrueVal, FalseVal, Builder))

return replaceInstUsesWith(SI, V);

if (Value *V = foldSelectICmpLshrAshr(ICI, TrueVal, FalseVal, Builder))

@@ -2094,9 +2088,8 @@ Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) {

// If the constant is the same after truncation to the smaller type and

// extension to the original type, we can narrow the select.

Type *SelType = Sel.getType();

- Constant *TruncC = ConstantExpr::getTrunc(C, SmallType);

- Constant *ExtC = ConstantExpr::getCast(ExtOpcode, TruncC, SelType);

- if (ExtC == C && ExtInst->hasOneUse()) {

+ Constant *TruncC = getLosslessTrunc(C, SmallType, ExtOpcode);

+ if (TruncC && ExtInst->hasOneUse()) {

Value *TruncCVal = cast<Value>(TruncC);

if (ExtInst == Sel.getFalseValue())

std::swap(X, TruncCVal);

@@ -2107,23 +2100,6 @@ Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) {

return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType);

}

- // If one arm of the select is the extend of the condition, replace that arm

- // with the extension of the appropriate known bool value.

- if (Cond == X) {

- if (ExtInst == Sel.getTrueValue()) {

- // select X, (sext X), C --> select X, -1, C

- // select X, (zext X), C --> select X, 1, C

- Constant *One = ConstantInt::getTrue(SmallType);

- Constant *AllOnesOrOne = ConstantExpr::getCast(ExtOpcode, One, SelType);

- return SelectInst::Create(Cond, AllOnesOrOne, C, "", nullptr, &Sel);

- } else {

- // select X, C, (sext X) --> select X, C, 0

- // select X, C, (zext X) --> select X, C, 0

- Constant *Zero = ConstantInt::getNullValue(SelType);

- return SelectInst::Create(Cond, C, Zero, "", nullptr, &Sel);

- }

return nullptr;

}

@@ -2561,7 +2537,7 @@ static Instruction *foldSelectToPhiImpl(SelectInst &Sel, BasicBlock *BB,

return nullptr;

}

- Builder.SetInsertPoint(&*BB->begin());

+ Builder.SetInsertPoint(BB, BB->begin());

auto *PN = Builder.CreatePHI(Sel.getType(), Inputs.size());

for (auto *Pred : predecessors(BB))

PN->addIncoming(Inputs[Pred], Pred);

@@ -2584,6 +2560,61 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,

return nullptr;

}

+/// Tries to reduce a pattern that arises when calculating the remainder of the

+/// Euclidean division. When the divisor is a power of two and is guaranteed not

+/// to be negative, a signed remainder can be folded with a bitwise and.

+///

+/// (x % n) < 0 ? (x % n) + n : (x % n)

+/// -> x & (n - 1)

+static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC,

+ IRBuilderBase &Builder) {

+ Value *CondVal = SI.getCondition();

+ Value *TrueVal = SI.getTrueValue();

+ Value *FalseVal = SI.getFalseValue();

+ ICmpInst::Predicate Pred;

+ Value *Op, *RemRes, *Remainder;

+ const APInt *C;

+ bool TrueIfSigned = false;

+ if (!(match(CondVal, m_ICmp(Pred, m_Value(RemRes), m_APInt(C))) &&

+ IC.isSignBitCheck(Pred, *C, TrueIfSigned)))

+ return nullptr;

+ // If the sign bit is not set, we have a SGE/SGT comparison, and the operands

+ // of the select are inverted.

+ if (!TrueIfSigned)

+ std::swap(TrueVal, FalseVal);

+ auto FoldToBitwiseAnd = [&](Value *Remainder) -> Instruction * {

+ Value *Add = Builder.CreateAdd(

+ Remainder, Constant::getAllOnesValue(RemRes->getType()));

+ return BinaryOperator::CreateAnd(Op, Add);

+ };

+ // Match the general case:

+ // %rem = srem i32 %x, %n

+ // %cnd = icmp slt i32 %rem, 0

+ // %add = add i32 %rem, %n

+ // %sel = select i1 %cnd, i32 %add, i32 %rem

+ if (match(TrueVal, m_Add(m_Value(RemRes), m_Value(Remainder))) &&

+ match(RemRes, m_SRem(m_Value(Op), m_Specific(Remainder))) &&

+ IC.isKnownToBeAPowerOfTwo(Remainder, /*OrZero*/ true) &&

+ FalseVal == RemRes)

+ return FoldToBitwiseAnd(Remainder);

+ // Match the case where the one arm has been replaced by constant 1:

+ // %rem = srem i32 %n, 2

+ // %cnd = icmp slt i32 %rem, 0

+ // %sel = select i1 %cnd, i32 1, i32 %rem

+ if (match(TrueVal, m_One()) &&

+ match(RemRes, m_SRem(m_Value(Op), m_SpecificInt(2))) &&

+ FalseVal == RemRes)

+ return FoldToBitwiseAnd(ConstantInt::get(RemRes->getType(), 2));

+ return nullptr;

static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) {

FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition());

if (!FI)

@@ -2860,8 +2891,15 @@ static Instruction *foldNestedSelects(SelectInst &OuterSelVal,

std::swap(InnerSel.TrueVal, InnerSel.FalseVal);

Value *AltCond = nullptr;

- auto matchOuterCond = [OuterSel, &AltCond](auto m_InnerCond) {

- return match(OuterSel.Cond, m_c_LogicalOp(m_InnerCond, m_Value(AltCond)));

+ auto matchOuterCond = [OuterSel, IsAndVariant, &AltCond](auto m_InnerCond) {

+ // An unsimplified select condition can match both LogicalAnd and LogicalOr

+ // (select true, true, false). Since below we assume that LogicalAnd implies

+ // InnerSel match the FVal and vice versa for LogicalOr, we can't match the

+ // alternative pattern here.

+ return IsAndVariant ? match(OuterSel.Cond,

+ m_c_LogicalAnd(m_InnerCond, m_Value(AltCond)))

+ : match(OuterSel.Cond,

+ m_c_LogicalOr(m_InnerCond, m_Value(AltCond)));

};

// Finally, match the condition that was driving the outermost `select`,

@@ -3024,31 +3062,37 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {

if (match(CondVal, m_Select(m_Value(A), m_Value(B), m_Zero())) &&

match(TrueVal, m_Specific(B)) && match(FalseVal, m_Zero()))

return replaceOperand(SI, 0, A);

- // select a, (select ~a, true, b), false -> select a, b, false

- if (match(TrueVal, m_c_LogicalOr(m_Not(m_Specific(CondVal)), m_Value(B))) &&

- match(FalseVal, m_Zero()))

- return replaceOperand(SI, 1, B);

- // select a, true, (select ~a, b, false) -> select a, true, b

- if (match(FalseVal, m_c_LogicalAnd(m_Not(m_Specific(CondVal)), m_Value(B))) &&

- match(TrueVal, m_One()))

- return replaceOperand(SI, 2, B);

// ~(A & B) & (A | B) --> A ^ B

if (match(&SI, m_c_LogicalAnd(m_Not(m_LogicalAnd(m_Value(A), m_Value(B))),

m_c_LogicalOr(m_Deferred(A), m_Deferred(B)))))

return BinaryOperator::CreateXor(A, B);

- // select (~a | c), a, b -> and a, (or c, freeze(b))

- if (match(CondVal, m_c_Or(m_Not(m_Specific(TrueVal)), m_Value(C))) &&

- CondVal->hasOneUse()) {

- FalseVal = Builder.CreateFreeze(FalseVal);

- return BinaryOperator::CreateAnd(TrueVal, Builder.CreateOr(C, FalseVal));

+ // select (~a | c), a, b -> select a, (select c, true, b), false

+ if (match(CondVal,

+ m_OneUse(m_c_Or(m_Not(m_Specific(TrueVal)), m_Value(C))))) {

+ Value *OrV = Builder.CreateSelect(C, One, FalseVal);

+ return SelectInst::Create(TrueVal, OrV, Zero);

+ }

+ // select (c & b), a, b -> select b, (select ~c, true, a), false

+ if (match(CondVal, m_OneUse(m_c_And(m_Value(C), m_Specific(FalseVal))))) {

+ if (Value *NotC = getFreelyInverted(C, C->hasOneUse(), &Builder)) {

+ Value *OrV = Builder.CreateSelect(NotC, One, TrueVal);

+ return SelectInst::Create(FalseVal, OrV, Zero);

+ }

+ // select (a | c), a, b -> select a, true, (select ~c, b, false)

+ if (match(CondVal, m_OneUse(m_c_Or(m_Specific(TrueVal), m_Value(C))))) {

+ if (Value *NotC = getFreelyInverted(C, C->hasOneUse(), &Builder)) {

+ Value *AndV = Builder.CreateSelect(NotC, FalseVal, Zero);

+ return SelectInst::Create(TrueVal, One, AndV);

+ }

}

- // select (~c & b), a, b -> and b, (or freeze(a), c)

- if (match(CondVal, m_c_And(m_Not(m_Value(C)), m_Specific(FalseVal))) &&

- CondVal->hasOneUse()) {

- TrueVal = Builder.CreateFreeze(TrueVal);

- return BinaryOperator::CreateAnd(FalseVal, Builder.CreateOr(C, TrueVal));

+ // select (c & ~b), a, b -> select b, true, (select c, a, false)

+ if (match(CondVal,

+ m_OneUse(m_c_And(m_Value(C), m_Not(m_Specific(FalseVal)))))) {

+ Value *AndV = Builder.CreateSelect(C, TrueVal, Zero);

+ return SelectInst::Create(FalseVal, One, AndV);

}

if (match(FalseVal, m_Zero()) || match(TrueVal, m_One())) {

@@ -3057,7 +3101,7 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {

Value *Op1 = IsAnd ? TrueVal : FalseVal;

if (isCheckForZeroAndMulWithOverflow(CondVal, Op1, IsAnd, Y)) {

auto *FI = new FreezeInst(*Y, (*Y)->getName() + ".fr");

- InsertNewInstBefore(FI, *cast<Instruction>(Y->getUser()));

+ InsertNewInstBefore(FI, cast<Instruction>(Y->getUser())->getIterator());

replaceUse(*Y, FI);

return replaceInstUsesWith(SI, Op1);

}

@@ -3272,6 +3316,31 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) {

Masked);

}

+bool InstCombinerImpl::fmulByZeroIsZero(Value *MulVal, FastMathFlags FMF,

+ const Instruction *CtxI) const {

+ KnownFPClass Known = computeKnownFPClass(MulVal, FMF, fcNegative, CtxI);

+ return Known.isKnownNeverNaN() && Known.isKnownNeverInfinity() &&

+ (FMF.noSignedZeros() || Known.signBitIsZeroOrNaN());

+static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0,

+ Value *Cmp1, Value *TrueVal,

+ Value *FalseVal, Instruction &CtxI,

+ bool SelectIsNSZ) {

+ Value *MulRHS;

+ if (match(Cmp1, m_PosZeroFP()) &&

+ match(TrueVal, m_c_FMul(m_Specific(Cmp0), m_Value(MulRHS)))) {

+ FastMathFlags FMF = cast<FPMathOperator>(TrueVal)->getFastMathFlags();

+ // nsz must be on the select, it must be ignored on the multiply. We

+ // need nnan and ninf on the multiply for the other value.

+ FMF.setNoSignedZeros(SelectIsNSZ);

+ return IC.fmulByZeroIsZero(MulRHS, FMF, &CtxI);

+ }

+ return false;

Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {

Value *CondVal = SI.getCondition();

Value *TrueVal = SI.getTrueValue();

@@ -3303,28 +3372,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {

ConstantInt::getFalse(CondType), SQ,

/* AllowRefinement */ true))

return replaceOperand(SI, 2, S);

- // Handle patterns involving sext/zext + not explicitly,

- // as simplifyWithOpReplaced() only looks past one instruction.

- Value *NotCond;

- // select a, sext(!a), b -> select !a, b, 0

- // select a, zext(!a), b -> select !a, b, 0

- if (match(TrueVal, m_ZExtOrSExt(m_CombineAnd(m_Value(NotCond),

- m_Not(m_Specific(CondVal))))))

- return SelectInst::Create(NotCond, FalseVal,

- Constant::getNullValue(SelType));

- // select a, b, zext(!a) -> select !a, 1, b

- if (match(FalseVal, m_ZExt(m_CombineAnd(m_Value(NotCond),

- m_Not(m_Specific(CondVal))))))

- return SelectInst::Create(NotCond, ConstantInt::get(SelType, 1), TrueVal);

- // select a, b, sext(!a) -> select !a, -1, b

- if (match(FalseVal, m_SExt(m_CombineAnd(m_Value(NotCond),

- m_Not(m_Specific(CondVal))))))

- return SelectInst::Create(NotCond, Constant::getAllOnesValue(SelType),

- TrueVal);

}

if (Instruction *R = foldSelectOfBools(SI))

@@ -3362,7 +3409,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {

}

+ auto *SIFPOp = dyn_cast<FPMathOperator>(&SI);

if (auto *FCmp = dyn_cast<FCmpInst>(CondVal)) {

+ FCmpInst::Predicate Pred = FCmp->getPredicate();

Value *Cmp0 = FCmp->getOperand(0), *Cmp1 = FCmp->getOperand(1);

// Are we selecting a value based on a comparison of the two values?

if ((Cmp0 == TrueVal && Cmp1 == FalseVal) ||

@@ -3372,7 +3422,7 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {

// e.g.

// (X ugt Y) ? X : Y -> (X ole Y) ? Y : X

- if (FCmp->hasOneUse() && FCmpInst::isUnordered(FCmp->getPredicate())) {

+ if (FCmp->hasOneUse() && FCmpInst::isUnordered(Pred)) {

FCmpInst::Predicate InvPred = FCmp->getInversePredicate();

IRBuilder<>::FastMathFlagGuard FMFG(Builder);

// FIXME: The FMF should propagate from the select, not the fcmp.

@@ -3383,14 +3433,47 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {

return replaceInstUsesWith(SI, NewSel);

}

+ if (SIFPOp) {

+ // Fold out scale-if-equals-zero pattern.

+ //

+ // This pattern appears in code with denormal range checks after it's

+ // assumed denormals are treated as zero. This drops a canonicalization.

+ // TODO: Could relax the signed zero logic. We just need to know the sign

+ // of the result matches (fmul x, y has the same sign as x).

+ //

+ // TODO: Handle always-canonicalizing variant that selects some value or 1

+ // scaling factor in the fmul visitor.

+ // TODO: Handle ldexp too

+ Value *MatchCmp0 = nullptr;

+ Value *MatchCmp1 = nullptr;

+ // (select (fcmp [ou]eq x, 0.0), (fmul x, K), x => x

+ // (select (fcmp [ou]ne x, 0.0), x, (fmul x, K) => x

+ if (Pred == CmpInst::FCMP_OEQ || Pred == CmpInst::FCMP_UEQ) {

+ MatchCmp0 = FalseVal;

+ MatchCmp1 = TrueVal;

+ } else if (Pred == CmpInst::FCMP_ONE || Pred == CmpInst::FCMP_UNE) {

+ MatchCmp0 = TrueVal;

+ MatchCmp1 = FalseVal;

+ }

+ if (Cmp0 == MatchCmp0 &&

+ matchFMulByZeroIfResultEqZero(*this, Cmp0, Cmp1, MatchCmp1, MatchCmp0,

+ SI, SIFPOp->hasNoSignedZeros()))

+ return replaceInstUsesWith(SI, Cmp0);

+ }

}

- if (isa<FPMathOperator>(SI)) {

+ if (SIFPOp) {

// TODO: Try to forward-propagate FMF from select arms to the select.

// Canonicalize select of FP values where NaN and -0.0 are not valid as

// minnum/maxnum intrinsics.

- if (SI.hasNoNaNs() && SI.hasNoSignedZeros()) {

+ if (SIFPOp->hasNoNaNs() && SIFPOp->hasNoSignedZeros()) {

Value *X, *Y;

if (match(&SI, m_OrdFMax(m_Value(X), m_Value(Y))))

return replaceInstUsesWith(

@@ -3430,6 +3513,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {

if (Instruction *I = foldSelectExtConst(SI))

return I;

+ if (Instruction *I = foldSelectWithSRem(SI, *this, Builder))

+ return I;

// Fold (select C, (gep Ptr, Idx), Ptr) -> (gep Ptr, (select C, Idx, 0))

// Fold (select C, Ptr, (gep Ptr, Idx)) -> (gep Ptr, (select C, 0, Idx))

auto SelectGepWithBase = [&](GetElementPtrInst *Gep, Value *Base,

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 89dad455f015..b7958978c450 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

@@ -136,9 +136,14 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(

assert(IdenticalShOpcodes && "Should not get here with different shifts.");

- // All good, we can do this fold.

- NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType());

+ if (NewShAmt->getType() != X->getType()) {

+ NewShAmt = ConstantFoldCastOperand(Instruction::ZExt, NewShAmt,

+ X->getType(), SQ.DL);

+ if (!NewShAmt)

+ return nullptr;

+ }

+ // All good, we can do this fold.

BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt);

// The flags can only be propagated if there wasn't a trunc.

@@ -245,7 +250,11 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,

SumOfShAmts = Constant::replaceUndefsWith(

SumOfShAmts, ConstantInt::get(SumOfShAmts->getType()->getScalarType(),

ExtendedTy->getScalarSizeInBits()));

- auto *ExtendedSumOfShAmts = ConstantExpr::getZExt(SumOfShAmts, ExtendedTy);

+ auto *ExtendedSumOfShAmts = ConstantFoldCastOperand(

+ Instruction::ZExt, SumOfShAmts, ExtendedTy, Q.DL);

+ if (!ExtendedSumOfShAmts)

+ return nullptr;

// And compute the mask as usual: ~(-1 << (SumOfShAmts))

auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);

auto *ExtendedInvertedMask =

@@ -278,16 +287,22 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,

ShAmtsDiff = Constant::replaceUndefsWith(

ShAmtsDiff, ConstantInt::get(ShAmtsDiff->getType()->getScalarType(),

-WidestTyBitWidth));

- auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt(

+ auto *ExtendedNumHighBitsToClear = ConstantFoldCastOperand(

+ Instruction::ZExt,

ConstantExpr::getSub(ConstantInt::get(ShAmtsDiff->getType(),

WidestTyBitWidth,

/*isSigned=*/false),

ShAmtsDiff),

- ExtendedTy);

+ ExtendedTy, Q.DL);

+ if (!ExtendedNumHighBitsToClear)

+ return nullptr;

// And compute the mask as usual: (-1 l>> (NumHighBitsToClear))

auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);

- NewMask =

- ConstantExpr::getLShr(ExtendedAllOnes, ExtendedNumHighBitsToClear);

+ NewMask = ConstantFoldBinaryOpOperands(Instruction::LShr, ExtendedAllOnes,

+ ExtendedNumHighBitsToClear, Q.DL);

+ if (!NewMask)

+ return nullptr;

} else

return nullptr; // Don't know anything about this pattern.

@@ -545,8 +560,8 @@ static bool canEvaluateShiftedShift(unsigned OuterShAmt, bool IsOuterShl,

/// this succeeds, getShiftedValue() will be called to produce the value.

static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,

InstCombinerImpl &IC, Instruction *CxtI) {

- // We can always evaluate constants shifted.

- if (isa<Constant>(V))

+ // We can always evaluate immediate constants.

+ if (match(V, m_ImmConstant()))

return true;

Instruction *I = dyn_cast<Instruction>(V);

@@ -709,13 +724,13 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,

case Instruction::Mul: {

assert(!isLeftShift && "Unexpected shift direction!");

auto *Neg = BinaryOperator::CreateNeg(I->getOperand(0));

- IC.InsertNewInstWith(Neg, *I);

+ IC.InsertNewInstWith(Neg, I->getIterator());

unsigned TypeWidth = I->getType()->getScalarSizeInBits();

APInt Mask = APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits);

auto *And = BinaryOperator::CreateAnd(Neg,

ConstantInt::get(I->getType(), Mask));

And->takeName(I);

- return IC.InsertNewInstWith(And, *I);

+ return IC.InsertNewInstWith(And, I->getIterator());

}

@@ -745,7 +760,7 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *C1,

// (C2 >> X) >> C1 --> (C2 >> C1) >> X

Constant *C2;

Value *X;

- if (match(Op0, m_BinOp(I.getOpcode(), m_Constant(C2), m_Value(X))))

+ if (match(Op0, m_BinOp(I.getOpcode(), m_ImmConstant(C2), m_Value(X))))

return BinaryOperator::Create(

I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), C2, C1), X);

@@ -928,6 +943,60 @@ Instruction *InstCombinerImpl::foldLShrOverflowBit(BinaryOperator &I) {

return new ZExtInst(Overflow, Ty);

}

+// Try to set nuw/nsw flags on shl or exact flag on lshr/ashr using knownbits.

+static bool setShiftFlags(BinaryOperator &I, const SimplifyQuery &Q) {

+ assert(I.isShift() && "Expected a shift as input");

+ // We already have all the flags.

+ if (I.getOpcode() == Instruction::Shl) {

+ if (I.hasNoUnsignedWrap() && I.hasNoSignedWrap())

+ return false;

+ } else {

+ if (I.isExact())

+ return false;

+ // shr (shl X, Y), Y

+ if (match(I.getOperand(0), m_Shl(m_Value(), m_Specific(I.getOperand(1))))) {

+ I.setIsExact();

+ return true;

+ }

+ // Compute what we know about shift count.

+ KnownBits KnownCnt = computeKnownBits(I.getOperand(1), /* Depth */ 0, Q);

+ unsigned BitWidth = KnownCnt.getBitWidth();

+ // Since shift produces a poison value if RHS is equal to or larger than the

+ // bit width, we can safely assume that RHS is less than the bit width.

+ uint64_t MaxCnt = KnownCnt.getMaxValue().getLimitedValue(BitWidth - 1);

+ KnownBits KnownAmt = computeKnownBits(I.getOperand(0), /* Depth */ 0, Q);

+ bool Changed = false;

+ if (I.getOpcode() == Instruction::Shl) {

+ // If we have as many leading zeros than maximum shift cnt we have nuw.

+ if (!I.hasNoUnsignedWrap() && MaxCnt <= KnownAmt.countMinLeadingZeros()) {

+ I.setHasNoUnsignedWrap();

+ Changed = true;

+ }

+ // If we have more sign bits than maximum shift cnt we have nsw.

+ if (!I.hasNoSignedWrap()) {

+ if (MaxCnt < KnownAmt.countMinSignBits() ||

+ MaxCnt < ComputeNumSignBits(I.getOperand(0), Q.DL, /*Depth*/ 0, Q.AC,

+ Q.CxtI, Q.DT)) {

+ I.setHasNoSignedWrap();

+ Changed = true;

+ }

+ return Changed;

+ }

+ // If we have at least as many trailing zeros as maximum count then we have

+ // exact.

+ Changed = MaxCnt <= KnownAmt.countMinTrailingZeros();

+ I.setIsExact(Changed);

+ return Changed;

Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {

const SimplifyQuery Q = SQ.getWithInstruction(&I);

@@ -976,7 +1045,11 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {

// If C1 < C: (X >>?,exact C1) << C --> X << (C - C1)

Constant *ShiftDiff = ConstantInt::get(Ty, ShAmtC - ShrAmt);

auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);

- NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());

+ NewShl->setHasNoUnsignedWrap(

+ I.hasNoUnsignedWrap() ||

+ (ShrAmt &&

+ cast<Instruction>(Op0)->getOpcode() == Instruction::LShr &&

+ I.hasNoSignedWrap()));

NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());

return NewShl;

}

@@ -997,7 +1070,11 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {

// If C1 < C: (X >>? C1) << C --> (X << (C - C1)) & (-1 << C)

Constant *ShiftDiff = ConstantInt::get(Ty, ShAmtC - ShrAmt);

auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);

- NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());

+ NewShl->setHasNoUnsignedWrap(

+ I.hasNoUnsignedWrap() ||

+ (ShrAmt &&

+ cast<Instruction>(Op0)->getOpcode() == Instruction::LShr &&

+ I.hasNoSignedWrap()));

NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());

Builder.Insert(NewShl);

APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));

@@ -1108,22 +1185,11 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {

Value *NewShift = Builder.CreateShl(X, Op1);

return BinaryOperator::CreateSub(NewLHS, NewShift);

}

- // If the shifted-out value is known-zero, then this is a NUW shift.

- if (!I.hasNoUnsignedWrap() &&

- MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, ShAmtC), 0,

- &I)) {

- I.setHasNoUnsignedWrap();

- return &I;

- }

- // If the shifted-out value is all signbits, then this is a NSW shift.

- if (!I.hasNoSignedWrap() && ComputeNumSignBits(Op0, 0, &I) > ShAmtC) {

- I.setHasNoSignedWrap();

- return &I;

- }

}

+ if (setShiftFlags(I, Q))

+ return &I;

// Transform (x >> y) << y to x & (-1 << y)

// Valid for any type of right-shift.

Value *X;

@@ -1161,15 +1227,6 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {

Value *NegX = Builder.CreateNeg(X, "neg");

return BinaryOperator::CreateAnd(NegX, X);

}

- // The only way to shift out the 1 is with an over-shift, so that would

- // be poison with or without "nuw". Undef is excluded because (undef << X)

- // is not undef (it is zero).

- Constant *ConstantOne = cast<Constant>(Op0);

- if (!I.hasNoUnsignedWrap() && !ConstantOne->containsUndefElement()) {

- I.setHasNoUnsignedWrap();

- return &I;

- }

}

return nullptr;

@@ -1235,9 +1292,10 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {

unsigned ShlAmtC = C1->getZExtValue();

Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmtC - ShAmtC);

if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {

- // (X <<nuw C1) >>u C --> X <<nuw (C1 - C)

+ // (X <<nuw C1) >>u C --> X <<nuw/nsw (C1 - C)

auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);

NewShl->setHasNoUnsignedWrap(true);

+ NewShl->setHasNoSignedWrap(ShAmtC > 0);

return NewShl;

}

if (Op0->hasOneUse()) {

@@ -1370,12 +1428,13 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {

if (Op0->hasOneUse()) {

APInt NewMulC = MulC->lshr(ShAmtC);

// if c is divisible by (1 << ShAmtC):

- // lshr (mul nuw x, MulC), ShAmtC -> mul nuw x, (MulC >> ShAmtC)

+ // lshr (mul nuw x, MulC), ShAmtC -> mul nuw nsw x, (MulC >> ShAmtC)

if (MulC->eq(NewMulC.shl(ShAmtC))) {

auto *NewMul =

BinaryOperator::CreateNUWMul(X, ConstantInt::get(Ty, NewMulC));

- BinaryOperator *OrigMul = cast<BinaryOperator>(Op0);

- NewMul->setHasNoSignedWrap(OrigMul->hasNoSignedWrap());

+ assert(ShAmtC != 0 &&

+ "lshr X, 0 should be handled by simplifyLShrInst.");

+ NewMul->setHasNoSignedWrap(true);

return NewMul;

}

@@ -1414,15 +1473,12 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {

Value *And = Builder.CreateAnd(BoolX, BoolY);

return new ZExtInst(And, Ty);

}

- // If the shifted-out value is known-zero, then this is an exact shift.

- if (!I.isExact() &&

- MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmtC), 0, &I)) {

- I.setIsExact();

- return &I;

- }

}

+ const SimplifyQuery Q = SQ.getWithInstruction(&I);

+ if (setShiftFlags(I, Q))

+ return &I;

// Transform (x << y) >> y to x & (-1 >> y)

if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))))) {

Constant *AllOnes = ConstantInt::getAllOnesValue(Ty);

@@ -1581,15 +1637,12 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {

if (match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y)))))

return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty);

}

- // If the shifted-out value is known-zero, then this is an exact shift.

- if (!I.isExact() &&

- MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {

- I.setIsExact();

- return &I;

- }

}

+ const SimplifyQuery Q = SQ.getWithInstruction(&I);

+ if (setShiftFlags(I, Q))

+ return &I;

// Prefer `-(x & 1)` over `(x << (bitwidth(x)-1)) a>> (bitwidth(x)-1)`

// as the pattern to splat the lowest bit.

// FIXME: iff X is already masked, we don't need the one-use check.

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 00eece9534b0..046ce9d1207e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

@@ -24,6 +24,12 @@ using namespace llvm::PatternMatch;

#define DEBUG_TYPE "instcombine"

+static cl::opt<bool>

+ VerifyKnownBits("instcombine-verify-known-bits",

+ cl::desc("Verify that computeKnownBits() and "

+ "SimplifyDemandedBits() are consistent"),

+ cl::Hidden, cl::init(false));

/// Check to see if the specified operand of the specified instruction is a

/// constant integer. If so, check to see if there are any bits set in the

/// constant that are not demanded. If so, shrink the constant and return true.

@@ -48,15 +54,20 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,

return true;

}

+/// Returns the bitwidth of the given scalar or pointer type. For vector types,

+/// returns the element type's bitwidth.

+static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {

+ if (unsigned BitWidth = Ty->getScalarSizeInBits())

+ return BitWidth;

+ return DL.getPointerTypeSizeInBits(Ty);

/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if

/// the instruction has any properties that allow us to simplify its operands.

-bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {

- unsigned BitWidth = Inst.getType()->getScalarSizeInBits();

- KnownBits Known(BitWidth);

- APInt DemandedMask(APInt::getAllOnes(BitWidth));

+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst,

+ KnownBits &Known) {

+ APInt DemandedMask(APInt::getAllOnes(Known.getBitWidth()));

Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,

0, &Inst);

if (!V) return false;

@@ -65,6 +76,13 @@ bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {

return true;

}

+/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if

+/// the instruction has any properties that allow us to simplify its operands.

+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {

+ KnownBits Known(getBitWidth(Inst.getType(), DL));

+ return SimplifyDemandedInstructionBits(Inst, Known);

/// This form of SimplifyDemandedBits simplifies the specified instruction

/// operand if possible, updating it in place. It returns true if it made any

/// change and false otherwise.

@@ -95,8 +113,8 @@ bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,

/// expression.

/// Known.One and Known.Zero always follow the invariant that:

/// Known.One & Known.Zero == 0.

-/// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and

-/// Known.Zero may only be accurate for those bits set in DemandedMask. Note

+/// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero

+/// are accurate even for bits not in DemandedMask. Note

/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all

/// be the same.

///

@@ -143,7 +161,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);

KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);

// If this is the root being simplified, allow it to have multiple uses,

// just set the DemandedMask to all bits so that we can try to simplify the

// operands. This allows visitTruncInst (for example) to simplify the

@@ -196,7 +213,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");

Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,

- Depth, DL, &AC, CxtI, &DT);

+ Depth, SQ.getWithInstruction(CxtI));

// If the client is only demanding bits that we know, return the known

// constant.

@@ -220,13 +237,16 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

// If either the LHS or the RHS are One, the result is One.

if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||

SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,

- Depth + 1))

+ Depth + 1)) {

+ // Disjoint flag may not longer hold.

+ I->dropPoisonGeneratingFlags();

return I;

+ }

assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");

assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");

Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,

- Depth, DL, &AC, CxtI, &DT);

+ Depth, SQ.getWithInstruction(CxtI));

// If the client is only demanding bits that we know, return the known

// constant.

@@ -244,6 +264,16 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

if (ShrinkDemandedConstant(I, 1, DemandedMask))

return I;

+ // Infer disjoint flag if no common bits are set.

+ if (!cast<PossiblyDisjointInst>(I)->isDisjoint()) {

+ WithCache<const Value *> LHSCache(I->getOperand(0), LHSKnown),

+ RHSCache(I->getOperand(1), RHSKnown);

+ if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ.getWithInstruction(I))) {

+ cast<PossiblyDisjointInst>(I)->setIsDisjoint(true);

+ return I;

+ }

break;

}

case Instruction::Xor: {

@@ -265,7 +295,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");

Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,

- Depth, DL, &AC, CxtI, &DT);

+ Depth, SQ.getWithInstruction(CxtI));

// If the client is only demanding bits that we know, return the known

// constant.

@@ -284,9 +314,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0

if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {

Instruction *Or =

- BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),

- I->getName());

- return InsertNewInstWith(Or, *I);

+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1));

+ if (DemandedMask.isAllOnes())

+ cast<PossiblyDisjointInst>(Or)->setIsDisjoint(true);

+ Or->takeName(I);

+ return InsertNewInstWith(Or, I->getIterator());

}

// If all of the demanded bits on one side are known, and all of the set

@@ -298,7 +330,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

Constant *AndC = Constant::getIntegerValue(VTy,

~RHSKnown.One & DemandedMask);

Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);

- return InsertNewInstWith(And, *I);

+ return InsertNewInstWith(And, I->getIterator());

}

// If the RHS is a constant, see if we can change it. Don't alter a -1

@@ -330,11 +362,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

Constant *AndC = ConstantInt::get(VTy, NewMask & AndRHS->getValue());

Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);

- InsertNewInstWith(NewAnd, *I);

+ InsertNewInstWith(NewAnd, I->getIterator());

Constant *XorC = ConstantInt::get(VTy, NewMask & XorRHS->getValue());

Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);

- return InsertNewInstWith(NewXor, *I);

+ return InsertNewInstWith(NewXor, I->getIterator());

}

break;

@@ -411,36 +443,21 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);

KnownBits InputKnown(SrcBitWidth);

- if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))

+ if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1)) {

+ // For zext nneg, we may have dropped the instruction which made the

+ // input non-negative.

+ I->dropPoisonGeneratingFlags();

return I;

+ }

assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");

+ if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() &&

+ !InputKnown.isNegative())

+ InputKnown.makeNonNegative();

Known = InputKnown.zextOrTrunc(BitWidth);

- assert(!Known.hasConflict() && "Bits known to be one AND zero?");

- break;

- }

- case Instruction::BitCast:

- if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())

- return nullptr; // vector->int or fp->int?

- if (auto *DstVTy = dyn_cast<VectorType>(VTy)) {

- if (auto *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) {

- if (isa<ScalableVectorType>(DstVTy) ||

- isa<ScalableVectorType>(SrcVTy) ||

- cast<FixedVectorType>(DstVTy)->getNumElements() !=

- cast<FixedVectorType>(SrcVTy)->getNumElements())

- // Don't touch a bitcast between vectors of different element counts.

- return nullptr;

- } else

- // Don't touch a scalar-to-vector bitcast.

- return nullptr;

- } else if (I->getOperand(0)->getType()->isVectorTy())

- // Don't touch a vector-to-scalar bitcast.

- return nullptr;

- if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))

- return I;

assert(!Known.hasConflict() && "Bits known to be one AND zero?");

break;

+ }

case Instruction::SExt: {

// Compute the bits in the result that are not present in the input.

unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();

@@ -461,8 +478,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

if (InputKnown.isNonNegative() ||

DemandedMask.getActiveBits() <= SrcBitWidth) {

// Convert to ZExt cast.

- CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());

- return InsertNewInstWith(NewCast, *I);

+ CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy);

+ NewCast->takeName(I);

+ return InsertNewInstWith(NewCast, I->getIterator());

}

// If the sign bit of the input is known set or clear, then we know the

@@ -586,7 +604,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

if (match(I->getOperand(1), m_APInt(C)) && C->countr_zero() == CTZ) {

Constant *ShiftC = ConstantInt::get(VTy, CTZ);

Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC);

- return InsertNewInstWith(Shl, *I);

+ return InsertNewInstWith(Shl, I->getIterator());

}

// For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:

@@ -595,7 +613,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

if (I->getOperand(0) == I->getOperand(1) && DemandedMask.ult(4)) {

Constant *One = ConstantInt::get(VTy, 1);

Instruction *And1 = BinaryOperator::CreateAnd(I->getOperand(0), One);

- return InsertNewInstWith(And1, *I);

+ return InsertNewInstWith(And1, I->getIterator());

}

computeKnownBits(I, Known, Depth, CxtI);

@@ -624,10 +642,12 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

if (DemandedMask.countr_zero() >= ShiftAmt &&

match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) {

Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt);

- Constant *NewC = ConstantExpr::getShl(C, LeftShiftAmtC);

- if (ConstantExpr::getLShr(NewC, LeftShiftAmtC) == C) {

+ Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::Shl, C,

+ LeftShiftAmtC, DL);

+ if (ConstantFoldBinaryOpOperands(Instruction::LShr, NewC, LeftShiftAmtC,

+ DL) == C) {

Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X);

- return InsertNewInstWith(Lshr, *I);

+ return InsertNewInstWith(Lshr, I->getIterator());

}

@@ -688,24 +708,23 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

Constant *C;

if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) {

Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt);

- Constant *NewC = ConstantExpr::getLShr(C, RightShiftAmtC);

- if (ConstantExpr::getShl(NewC, RightShiftAmtC) == C) {

+ Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::LShr, C,

+ RightShiftAmtC, DL);

+ if (ConstantFoldBinaryOpOperands(Instruction::Shl, NewC,

+ RightShiftAmtC, DL) == C) {

Instruction *Shl = BinaryOperator::CreateShl(NewC, X);

- return InsertNewInstWith(Shl, *I);

+ return InsertNewInstWith(Shl, I->getIterator());

}

// Unsigned shift right.

APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));

- // If the shift is exact, then it does demand the low bits (and knows that

- // they are zero).

- if (cast<LShrOperator>(I)->isExact())

- DemandedMaskIn.setLowBits(ShiftAmt);

- if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))

+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) {

+ // exact flag may not longer hold.

+ I->dropPoisonGeneratingFlags();

return I;

+ }

assert(!Known.hasConflict() && "Bits known to be one AND zero?");

Known.Zero.lshrInPlace(ShiftAmt);

Known.One.lshrInPlace(ShiftAmt);

@@ -733,7 +752,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

// Perform the logical shift right.

Instruction *NewVal = BinaryOperator::CreateLShr(

I->getOperand(0), I->getOperand(1), I->getName());

- return InsertNewInstWith(NewVal, *I);

+ return InsertNewInstWith(NewVal, I->getIterator());

}

const APInt *SA;

@@ -747,13 +766,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

if (DemandedMask.countl_zero() <= ShiftAmt)

DemandedMaskIn.setSignBit();

- // If the shift is exact, then it does demand the low bits (and knows that

- // they are zero).

- if (cast<AShrOperator>(I)->isExact())

- DemandedMaskIn.setLowBits(ShiftAmt);

- if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))

+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) {

+ // exact flag may not longer hold.

+ I->dropPoisonGeneratingFlags();

return I;

+ }

assert(!Known.hasConflict() && "Bits known to be one AND zero?");

// Compute the new bits that are at the top now plus sign bits.

@@ -770,7 +787,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),

I->getOperand(1));

LShr->setIsExact(cast<BinaryOperator>(I)->isExact());

- return InsertNewInstWith(LShr, *I);

+ LShr->takeName(I);

+ return InsertNewInstWith(LShr, I->getIterator());

} else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one.

Known.One |= HighBits;

}

@@ -867,7 +885,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

match(II->getArgOperand(0), m_Not(m_Value(X)))) {

Function *Ctpop = Intrinsic::getDeclaration(

II->getModule(), Intrinsic::ctpop, VTy);

- return InsertNewInstWith(CallInst::Create(Ctpop, {X}), *I);

+ return InsertNewInstWith(CallInst::Create(Ctpop, {X}), I->getIterator());

}

break;

}

@@ -894,10 +912,52 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

NewVal = BinaryOperator::CreateShl(

II->getArgOperand(0), ConstantInt::get(VTy, NTZ - NLZ));

NewVal->takeName(I);

- return InsertNewInstWith(NewVal, *I);

+ return InsertNewInstWith(NewVal, I->getIterator());

}

break;

}

+ case Intrinsic::ptrmask: {

+ unsigned MaskWidth = I->getOperand(1)->getType()->getScalarSizeInBits();

+ RHSKnown = KnownBits(MaskWidth);

+ // If either the LHS or the RHS are Zero, the result is zero.

+ if (SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1) ||

+ SimplifyDemandedBits(

+ I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth),

+ RHSKnown, Depth + 1))

+ return I;

+ // TODO: Should be 1-extend

+ RHSKnown = RHSKnown.anyextOrTrunc(BitWidth);

+ assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");

+ assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");

+ Known = LHSKnown & RHSKnown;

+ KnownBitsComputed = true;

+ // If the client is only demanding bits we know to be zero, return

+ // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer

+ // provenance, but making the mask zero will be easily optimizable in

+ // the backend.

+ if (DemandedMask.isSubsetOf(Known.Zero) &&

+ !match(I->getOperand(1), m_Zero()))

+ return replaceOperand(

+ *I, 1, Constant::getNullValue(I->getOperand(1)->getType()));

+ // Mask in demanded space does nothing.

+ // NOTE: We may have attributes associated with the return value of the

+ // llvm.ptrmask intrinsic that will be lost when we just return the

+ // operand. We should try to preserve them.

+ if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))

+ return I->getOperand(0);

+ // If the RHS is a constant, see if we can simplify it.

+ if (ShrinkDemandedConstant(

+ I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth)))

+ return I;

+ break;

+ }

case Intrinsic::fshr:

case Intrinsic::fshl: {

const APInt *SA;

@@ -918,7 +978,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))

return I;

} else { // fshl is a rotate

- // Avoid converting rotate into funnel shift.

+ // Avoid converting rotate into funnel shift.

// Only simplify if one operand is constant.

LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, I);

if (DemandedMaskLHS.isSubsetOf(LHSKnown.Zero | LHSKnown.One) &&

@@ -982,10 +1042,29 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

}

+ if (V->getType()->isPointerTy()) {

+ Align Alignment = V->getPointerAlignment(DL);

+ Known.Zero.setLowBits(Log2(Alignment));

+ }

// If the client is only demanding bits that we know, return the known

- // constant.

- if (DemandedMask.isSubsetOf(Known.Zero|Known.One))

+ // constant. We can't directly simplify pointers as a constant because of

+ // pointer provenance.

+ // TODO: We could return `(inttoptr const)` for pointers.

+ if (!V->getType()->isPointerTy() && DemandedMask.isSubsetOf(Known.Zero | Known.One))

return Constant::getIntegerValue(VTy, Known.One);

+ if (VerifyKnownBits) {

+ KnownBits ReferenceKnown = computeKnownBits(V, Depth, CxtI);

+ if (Known != ReferenceKnown) {

+ errs() << "Mismatched known bits for " << *V << " in "

+ << I->getFunction()->getName() << "\n";

+ errs() << "computeKnownBits(): " << ReferenceKnown << "\n";

+ errs() << "SimplifyDemandedBits(): " << Known << "\n";

+ std::abort();

+ }

return nullptr;

}

@@ -1009,8 +1088,9 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(

case Instruction::And: {

computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);

computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);

- Known = LHSKnown & RHSKnown;

- computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));

+ Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,

+ Depth, SQ.getWithInstruction(CxtI));

+ computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));

// If the client is only demanding bits that we know, return the known

// constant.

@@ -1029,8 +1109,9 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(

case Instruction::Or: {

computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);

computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);

- Known = LHSKnown | RHSKnown;

- computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));

+ Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,

+ Depth, SQ.getWithInstruction(CxtI));

+ computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));

// If the client is only demanding bits that we know, return the known

// constant.

@@ -1051,8 +1132,9 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(

case Instruction::Xor: {

computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);

computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);

- Known = LHSKnown ^ RHSKnown;

- computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));

+ Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,

+ Depth, SQ.getWithInstruction(CxtI));

+ computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));

// If the client is only demanding bits that we know, return the known

// constant.

@@ -1085,7 +1167,7 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(

bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();

Known = KnownBits::computeForAddSub(/*Add*/ true, NSW, LHSKnown, RHSKnown);

- computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));

+ computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));

break;

}

case Instruction::Sub: {

@@ -1101,7 +1183,7 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(

bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();

computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);

Known = KnownBits::computeForAddSub(/*Add*/ false, NSW, LHSKnown, RHSKnown);

- computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));

+ computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));

break;

}

case Instruction::AShr: {

@@ -1219,7 +1301,7 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits(

New->setIsExact(true);

}

- return InsertNewInstWith(New, *Shl);

+ return InsertNewInstWith(New, Shl->getIterator());

}

return nullptr;

@@ -1549,7 +1631,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,

Instruction *New = InsertElementInst::Create(

Op, Value, ConstantInt::get(Type::getInt64Ty(I->getContext()), Idx),

Shuffle->getName());

- InsertNewInstWith(New, *Shuffle);

+ InsertNewInstWith(New, Shuffle->getIterator());

return New;

}

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4a5ffef2b08e..c8b58c51d4e6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

@@ -132,7 +132,7 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,

// Create a scalar PHI node that will replace the vector PHI node

// just before the current PHI node.

PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(

- PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));

+ PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), PN->getIterator()));

// Scalarize each PHI operand.

for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {

Value *PHIInVal = PN->getIncomingValue(i);

@@ -148,10 +148,10 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,

Value *Op = InsertNewInstWith(

ExtractElementInst::Create(B0->getOperand(opId), Elt,

B0->getOperand(opId)->getName() + ".Elt"),

- *B0);

+ B0->getIterator());

Value *newPHIUser = InsertNewInstWith(

BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),

- scalarPHI, Op, B0), *B0);

+ scalarPHI, Op, B0), B0->getIterator());

scalarPHI->addIncoming(newPHIUser, inBB);

} else {

// Scalarize PHI input:

@@ -165,7 +165,7 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,

InsertPos = inBB->getFirstInsertionPt();

}

- InsertNewInstWith(newEI, *InsertPos);

+ InsertNewInstWith(newEI, InsertPos);

scalarPHI->addIncoming(newEI, inBB);

}

@@ -441,7 +441,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {

if (IndexC->getValue().getActiveBits() <= BitWidth)

Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));

else

- Idx = UndefValue::get(Ty);

+ Idx = PoisonValue::get(Ty);

return replaceInstUsesWith(EI, Idx);

}

@@ -742,7 +742,7 @@ static bool replaceExtractElements(InsertElementInst *InsElt,

if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))

WideVec->insertAfter(ExtVecOpInst);

else

- IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());

+ IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());

// Replace extracts from the original narrow vector with extracts from the new

// wide vector.

@@ -751,7 +751,7 @@ static bool replaceExtractElements(InsertElementInst *InsElt,

if (!OldExt || OldExt->getParent() != WideVec->getParent())

continue;

auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));

- IC.InsertNewInstWith(NewExt, *OldExt);

+ IC.InsertNewInstWith(NewExt, OldExt->getIterator());

IC.replaceInstUsesWith(*OldExt, NewExt);

// Add the old extracts to the worklist for DCE. We can't remove the

// extracts directly, because they may still be used by the calling code.

@@ -1121,7 +1121,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(

// Note that the same block can be a predecessor more than once,

// and we need to preserve that invariant for the PHI node.

BuilderTy::InsertPointGuard Guard(Builder);

- Builder.SetInsertPoint(UseBB->getFirstNonPHI());

+ Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());

auto *PHI =

Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");

for (BasicBlock *Pred : Preds)

@@ -2122,8 +2122,8 @@ static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {

NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];

// A select mask with undef elements might look like an identity mask.

- assert((ShuffleVectorInst::isSelectMask(NewMask) ||

- ShuffleVectorInst::isIdentityMask(NewMask)) &&

+ assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||

+ ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&

"Unexpected shuffle mask");

return new ShuffleVectorInst(X, Y, NewMask);

}

@@ -2197,9 +2197,9 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,

!match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)

return nullptr;

- // Insert into element 0 of an undef vector.

- UndefValue *UndefVec = UndefValue::get(Shuf.getType());

- Value *NewIns = Builder.CreateInsertElement(UndefVec, X, (uint64_t)0);

+ // Insert into element 0 of a poison vector.

+ PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());

+ Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);

// Splat from element 0. Any mask element that is undefined remains undefined.

// For example:

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index afd6e034f46d..f072f5cec309 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

@@ -130,13 +130,6 @@ STATISTIC(NumReassoc , "Number of reassociations");

DEBUG_COUNTER(VisitCounter, "instcombine-visit",

"Controls which instructions are visited");

-// FIXME: these limits eventually should be as low as 2.

-#ifndef NDEBUG

-static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;

-#else

-static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;

-#endif

static cl::opt<bool>

EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),

cl::init(true));

@@ -145,12 +138,6 @@ static cl::opt<unsigned> MaxSinkNumUsers(

"instcombine-max-sink-users", cl::init(32),

cl::desc("Maximum number of undroppable users for instruction sinking"));

-static cl::opt<unsigned> InfiniteLoopDetectionThreshold(

- "instcombine-infinite-loop-threshold",

- cl::desc("Number of instruction combining iterations considered an "

- "infinite loop"),

- cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden);

static cl::opt<unsigned>

MaxArraySize("instcombine-maxarray-size", cl::init(1024),

cl::desc("Maximum array size considered when doing a combine"));

@@ -358,15 +345,19 @@ static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,

// Fold the constants together in the destination type:

// (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)

+ const DataLayout &DL = IC.getDataLayout();

Type *DestTy = C1->getType();

- Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);

- Constant *FoldedC =

- ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, IC.getDataLayout());

+ Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);

+ if (!CastC2)

+ return false;

+ Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);

if (!FoldedC)

return false;

IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));

IC.replaceOperand(*BinOp1, 1, FoldedC);

+ BinOp1->dropPoisonGeneratingFlags();

+ Cast->dropPoisonGeneratingFlags();

return true;

}

@@ -542,12 +533,12 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {

BinaryOperator::Create(Opcode, A, B);

if (isa<FPMathOperator>(NewBO)) {

- FastMathFlags Flags = I.getFastMathFlags();

- Flags &= Op0->getFastMathFlags();

- Flags &= Op1->getFastMathFlags();

- NewBO->setFastMathFlags(Flags);

+ FastMathFlags Flags = I.getFastMathFlags() &

+ Op0->getFastMathFlags() &

+ Op1->getFastMathFlags();

+ NewBO->setFastMathFlags(Flags);

}

- InsertNewInstWith(NewBO, I);

+ InsertNewInstWith(NewBO, I.getIterator());

NewBO->takeName(Op1);

replaceOperand(I, 0, NewBO);

replaceOperand(I, 1, CRes);

@@ -749,7 +740,16 @@ static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,

// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).

// -> (BinOp (logic_shift (BinOp X, Y)), Mask)

+//

+// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))

+// IFF

+// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`

+// 2) Binop2 is `not`

+//

+// -> (arithmetic_shift Binop1((not X), Y), Amt)

Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {

+ const DataLayout &DL = I.getModule()->getDataLayout();

auto IsValidBinOpc = [](unsigned Opc) {

switch (Opc) {

default:

@@ -768,11 +768,13 @@ Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {

// constraints.

auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,

unsigned ShOpc) {

+ assert(ShOpc != Instruction::AShr);

return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||

ShOpc == Instruction::Shl;

};

auto GetInvShift = [](unsigned ShOpc) {

+ assert(ShOpc != Instruction::AShr);

return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;

};

@@ -796,23 +798,23 @@ Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {

// Otherwise, need mask that meets the below requirement.

// (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask

- return ConstantExpr::get(

- ShOpc, ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift),

- CShift) == CMask;

+ Constant *MaskInvShift =

+ ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);

+ return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==

+ CMask;

};

auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {

Constant *CMask, *CShift;

Value *X, *Y, *ShiftedX, *Mask, *Shift;

if (!match(I.getOperand(ShOpnum),

- m_OneUse(m_LogicalShift(m_Value(Y), m_Value(Shift)))))

+ m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))

return nullptr;

if (!match(I.getOperand(1 - ShOpnum),

m_BinOp(m_Value(ShiftedX), m_Value(Mask))))

return nullptr;

- if (!match(ShiftedX,

- m_OneUse(m_LogicalShift(m_Value(X), m_Specific(Shift)))))

+ if (!match(ShiftedX, m_OneUse(m_Shift(m_Value(X), m_Specific(Shift)))))

return nullptr;

// Make sure we are matching instruction shifts and not ConstantExpr

@@ -836,6 +838,18 @@ Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {

if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))

return nullptr;

+ if (ShOpc == Instruction::AShr) {

+ if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&

+ BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {

+ Value *NotX = Builder.CreateNot(X);

+ Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);

+ return BinaryOperator::Create(

+ static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);

+ }

+ return nullptr;

+ }

// If BinOp1 == BinOp2 and it's bitwise or shl with add, then just

// distribute to drop the shift irrelevant of constants.

if (BinOpc == I.getOpcode() &&

@@ -857,7 +871,8 @@ Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {

if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))

return nullptr;

- Constant *NewCMask = ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift);

+ Constant *NewCMask =

+ ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);

Value *NewBinOp2 = Builder.CreateBinOp(

static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);

Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);

@@ -924,13 +939,17 @@ InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {

// If the value used in the zext/sext is the select condition, or the negated

// of the select condition, the binop can be simplified.

- if (CondVal == A)

- return SelectInst::Create(CondVal, NewFoldedConst(false, TrueVal),

+ if (CondVal == A) {

+ Value *NewTrueVal = NewFoldedConst(false, TrueVal);

+ return SelectInst::Create(CondVal, NewTrueVal,

NewFoldedConst(true, FalseVal));

+ }

- if (match(A, m_Not(m_Specific(CondVal))))

- return SelectInst::Create(CondVal, NewFoldedConst(true, TrueVal),

+ if (match(A, m_Not(m_Specific(CondVal)))) {

+ Value *NewTrueVal = NewFoldedConst(true, TrueVal);

+ return SelectInst::Create(CondVal, NewTrueVal,

NewFoldedConst(false, FalseVal));

+ }

return nullptr;

}

@@ -1167,6 +1186,8 @@ void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {

break;

case Instruction::Xor:

replaceInstUsesWith(cast<Instruction>(*U), I);

+ // Add to worklist for DCE.

+ addToWorklist(cast<Instruction>(U));

break;

default:

llvm_unreachable("Got unexpected user - out of sync with "

@@ -1268,7 +1289,7 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,

Value *NewOp, InstCombiner &IC) {

Instruction *Clone = I.clone();

Clone->replaceUsesOfWith(SI, NewOp);

- IC.InsertNewInstBefore(Clone, *SI);

+ IC.InsertNewInstBefore(Clone, SI->getIterator());

return Clone;

}

@@ -1302,6 +1323,21 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,

return nullptr;

}

+ // Test if a FCmpInst instruction is used exclusively by a select as

+ // part of a minimum or maximum operation. If so, refrain from doing

+ // any other folding. This helps out other analyses which understand

+ // non-obfuscated minimum and maximum idioms. And in this case, at

+ // least one of the comparison operands has at least one user besides

+ // the compare (the select), which would often largely negate the

+ // benefit of folding anyway.

+ if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {

+ if (CI->hasOneUse()) {

+ Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);

+ if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1))

+ return nullptr;

+ }

// Make sure that one of the select arms constant folds successfully.

Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);

Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);

@@ -1316,6 +1352,47 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,

return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);

}

+static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN,

+ Value *InValue, BasicBlock *InBB,

+ const DataLayout &DL,

+ const SimplifyQuery SQ) {

+ // NB: It is a precondition of this transform that the operands be

+ // phi translatable! This is usually trivially satisfied by limiting it

+ // to constant ops, and for selects we do a more sophisticated check.

+ SmallVector<Value *> Ops;

+ for (Value *Op : I.operands()) {

+ if (Op == PN)

+ Ops.push_back(InValue);

+ else

+ Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));

+ }

+ // Don't consider the simplification successful if we get back a constant

+ // expression. That's just an instruction in hiding.

+ // Also reject the case where we simplify back to the phi node. We wouldn't

+ // be able to remove it in that case.

+ Value *NewVal = simplifyInstructionWithOperands(

+ &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));

+ if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))

+ return NewVal;

+ // Check if incoming PHI value can be replaced with constant

+ // based on implied condition.

+ BranchInst *TerminatorBI = dyn_cast<BranchInst>(InBB->getTerminator());

+ const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);

+ if (TerminatorBI && TerminatorBI->isConditional() &&

+ TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {

+ bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();

+ std::optional<bool> ImpliedCond =

+ isImpliedCondition(TerminatorBI->getCondition(), ICmp->getPredicate(),

+ Ops[0], Ops[1], DL, LHSIsTrue);

+ if (ImpliedCond)

+ return ConstantInt::getBool(I.getType(), ImpliedCond.value());

+ }

+ return nullptr;

Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {

unsigned NumPHIValues = PN->getNumIncomingValues();

if (NumPHIValues == 0)

@@ -1344,29 +1421,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {

Value *InVal = PN->getIncomingValue(i);

BasicBlock *InBB = PN->getIncomingBlock(i);

- // NB: It is a precondition of this transform that the operands be

- // phi translatable! This is usually trivially satisfied by limiting it

- // to constant ops, and for selects we do a more sophisticated check.

- SmallVector<Value *> Ops;

- for (Value *Op : I.operands()) {

- if (Op == PN)

- Ops.push_back(InVal);

- else

- Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));

- }

- // Don't consider the simplification successful if we get back a constant

- // expression. That's just an instruction in hiding.

- // Also reject the case where we simplify back to the phi node. We wouldn't

- // be able to remove it in that case.

- Value *NewVal = simplifyInstructionWithOperands(

- &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));

- if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr())) {

+ if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {

NewPhiValues.push_back(NewVal);

continue;

}

- if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.

if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.

NonSimplifiedBB = InBB;

@@ -1402,7 +1461,7 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {

// Okay, we can do the transformation: create the new PHI node.

PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());

- InsertNewInstBefore(NewPN, *PN);

+ InsertNewInstBefore(NewPN, PN->getIterator());

NewPN->takeName(PN);

NewPN->setDebugLoc(PN->getDebugLoc());

@@ -1417,7 +1476,7 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {

else

U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);

}

- InsertNewInstBefore(Clone, *NonSimplifiedBB->getTerminator());

+ InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());

}

for (unsigned i = 0; i != NumPHIValues; ++i) {

@@ -1848,8 +1907,8 @@ Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {

Constant *WideC;

if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))

return nullptr;

- Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());

- if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)

+ Constant *NarrowC = getLosslessTrunc(WideC, X->getType(), CastOpc);

+ if (!NarrowC)

return nullptr;

Y = NarrowC;

}

@@ -1940,7 +1999,7 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,

APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);

if (NumVarIndices != Src->getNumIndices()) {

// FIXME: getIndexedOffsetInType() does not handled scalable vectors.

- if (isa<ScalableVectorType>(BaseType))

+ if (BaseType->isScalableTy())

return nullptr;

SmallVector<Value *> ConstantIndices;

@@ -2048,12 +2107,126 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,

return nullptr;

}

+Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,

+ BuilderTy *Builder,

+ bool &DoesConsume, unsigned Depth) {

+ static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));

+ // ~(~(X)) -> X.

+ Value *A, *B;

+ if (match(V, m_Not(m_Value(A)))) {

+ DoesConsume = true;

+ return A;

+ }

+ Constant *C;

+ // Constants can be considered to be not'ed values.

+ if (match(V, m_ImmConstant(C)))

+ return ConstantExpr::getNot(C);

+ if (Depth++ >= MaxAnalysisRecursionDepth)

+ return nullptr;

+ // The rest of the cases require that we invert all uses so don't bother

+ // doing the analysis if we know we can't use the result.

+ if (!WillInvertAllUses)

+ return nullptr;

+ // Compares can be inverted if all of their uses are being modified to use

+ // the ~V.

+ if (auto *I = dyn_cast<CmpInst>(V)) {

+ if (Builder != nullptr)

+ return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),

+ I->getOperand(1));

+ return NonNull;

+ }

+ // If `V` is of the form `A + B` then `-1 - V` can be folded into

+ // `(-1 - B) - A` if we are willing to invert all of the uses.

+ if (match(V, m_Add(m_Value(A), m_Value(B)))) {

+ if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,

+ DoesConsume, Depth))

+ return Builder ? Builder->CreateSub(BV, A) : NonNull;

+ if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,

+ DoesConsume, Depth))

+ return Builder ? Builder->CreateSub(AV, B) : NonNull;

+ return nullptr;

+ }

+ // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded

+ // into `A ^ B` if we are willing to invert all of the uses.

+ if (match(V, m_Xor(m_Value(A), m_Value(B)))) {

+ if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,

+ DoesConsume, Depth))

+ return Builder ? Builder->CreateXor(A, BV) : NonNull;

+ if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,

+ DoesConsume, Depth))

+ return Builder ? Builder->CreateXor(AV, B) : NonNull;

+ return nullptr;

+ }

+ // If `V` is of the form `B - A` then `-1 - V` can be folded into

+ // `A + (-1 - B)` if we are willing to invert all of the uses.

+ if (match(V, m_Sub(m_Value(A), m_Value(B)))) {

+ if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,

+ DoesConsume, Depth))

+ return Builder ? Builder->CreateAdd(AV, B) : NonNull;

+ return nullptr;

+ }

+ // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded

+ // into `A s>> B` if we are willing to invert all of the uses.

+ if (match(V, m_AShr(m_Value(A), m_Value(B)))) {

+ if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,

+ DoesConsume, Depth))

+ return Builder ? Builder->CreateAShr(AV, B) : NonNull;

+ return nullptr;

+ }

+ // Treat lshr with non-negative operand as ashr.

+ if (match(V, m_LShr(m_Value(A), m_Value(B))) &&

+ isKnownNonNegative(A, SQ.getWithInstruction(cast<Instruction>(V)),

+ Depth)) {

+ if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,

+ DoesConsume, Depth))

+ return Builder ? Builder->CreateAShr(AV, B) : NonNull;

+ return nullptr;

+ }

+ Value *Cond;

+ // LogicOps are special in that we canonicalize them at the cost of an

+ // instruction.

+ bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&

+ !shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(V));

+ // Selects/min/max with invertible operands are freely invertible

+ if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {

+ if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,

+ DoesConsume, Depth))

+ return nullptr;

+ if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,

+ DoesConsume, Depth)) {

+ if (Builder != nullptr) {

+ Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,

+ DoesConsume, Depth);

+ assert(NotB != nullptr &&

+ "Unable to build inverted value for known freely invertable op");

+ if (auto *II = dyn_cast<IntrinsicInst>(V))

+ return Builder->CreateBinaryIntrinsic(

+ getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);

+ return Builder->CreateSelect(Cond, NotA, NotB);

+ }

+ return NonNull;

+ }

+ return nullptr;

Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {

Value *PtrOp = GEP.getOperand(0);

SmallVector<Value *, 8> Indices(GEP.indices());

Type *GEPType = GEP.getType();

Type *GEPEltType = GEP.getSourceElementType();

- bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);

+ bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();

if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),

SQ.getWithInstruction(&GEP)))

return replaceInstUsesWith(GEP, V);

@@ -2221,7 +2394,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {

NewGEP->setOperand(DI, NewPN);

}

- NewGEP->insertInto(GEP.getParent(), GEP.getParent()->getFirstInsertionPt());

+ NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());

return replaceOperand(GEP, 0, NewGEP);

}

@@ -2264,11 +2437,43 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {

return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);

}

// We do not handle pointer-vector geps here.

if (GEPType->isVectorTy())

return nullptr;

+ if (GEP.getNumIndices() == 1) {

+ // Try to replace ADD + GEP with GEP + GEP.

+ Value *Idx1, *Idx2;

+ if (match(GEP.getOperand(1),

+ m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {

+ // %idx = add i64 %idx1, %idx2

+ // %gep = getelementptr i32, ptr %ptr, i64 %idx

+ // as:

+ // %newptr = getelementptr i32, ptr %ptr, i64 %idx1

+ // %newgep = getelementptr i32, ptr %newptr, i64 %idx2

+ auto *NewPtr = Builder.CreateGEP(GEP.getResultElementType(),

+ GEP.getPointerOperand(), Idx1);

+ return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr,

+ Idx2);

+ }

+ ConstantInt *C;

+ if (match(GEP.getOperand(1), m_OneUse(m_SExt(m_OneUse(m_NSWAdd(

+ m_Value(Idx1), m_ConstantInt(C))))))) {

+ // %add = add nsw i32 %idx1, idx2

+ // %sidx = sext i32 %add to i64

+ // %gep = getelementptr i32, ptr %ptr, i64 %sidx

+ // as:

+ // %newptr = getelementptr i32, ptr %ptr, i32 %idx1

+ // %newgep = getelementptr i32, ptr %newptr, i32 idx2

+ auto *NewPtr = Builder.CreateGEP(

+ GEP.getResultElementType(), GEP.getPointerOperand(),

+ Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()));

+ return GetElementPtrInst::Create(

+ GEP.getResultElementType(), NewPtr,

+ Builder.CreateSExt(C, GEP.getOperand(1)->getType()));

+ }

if (!GEP.isInBounds()) {

unsigned IdxWidth =

DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());

@@ -2362,6 +2567,26 @@ static bool isAllocSiteRemovable(Instruction *AI,

unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;

if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))

return false;

+ // Do not fold compares to aligned_alloc calls, as they may have to

+ // return null in case the required alignment cannot be satisfied,

+ // unless we can prove that both alignment and size are valid.

+ auto AlignmentAndSizeKnownValid = [](CallBase *CB) {

+ // Check if alignment and size of a call to aligned_alloc is valid,

+ // that is alignment is a power-of-2 and the size is a multiple of the

+ // alignment.

+ const APInt *Alignment;

+ const APInt *Size;

+ return match(CB->getArgOperand(0), m_APInt(Alignment)) &&

+ match(CB->getArgOperand(1), m_APInt(Size)) &&

+ Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();

+ };

+ auto *CB = dyn_cast<CallBase>(AI);

+ LibFunc TheLibFunc;

+ if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&

+ TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&

+ !AlignmentAndSizeKnownValid(CB))

+ return false;

Users.emplace_back(I);

continue;

}

@@ -2451,9 +2676,10 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {

// If we are removing an alloca with a dbg.declare, insert dbg.value calls

// before each store.

SmallVector<DbgVariableIntrinsic *, 8> DVIs;

+ SmallVector<DPValue *, 8> DPVs;

std::unique_ptr<DIBuilder> DIB;

if (isa<AllocaInst>(MI)) {

- findDbgUsers(DVIs, &MI);

+ findDbgUsers(DVIs, &MI, &DPVs);

DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));

}

@@ -2493,6 +2719,9 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {

for (auto *DVI : DVIs)

if (DVI->isAddressOfVariable())

ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);

+ for (auto *DPV : DPVs)

+ if (DPV->isAddressOfVariable())

+ ConvertDebugDeclareToDebugValue(DPV, SI, *DIB);

} else {

// Casts, GEP, or anything else: we're about to delete this instruction,

// so it can not have any valid uses.

@@ -2531,9 +2760,15 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {

// If there is a dead store to `%a` in @trivially_inlinable_no_op, the

// "arg0" dbg.value may be stale after the call. However, failing to remove

// the DW_OP_deref dbg.value causes large gaps in location coverage.

+ //

+ // FIXME: the Assignment Tracking project has now likely made this

+ // redundant (and it's sometimes harmful).

for (auto *DVI : DVIs)

if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())

DVI->eraseFromParent();

+ for (auto *DPV : DPVs)

+ if (DPV->isAddressOfVariable() || DPV->getExpression()->startsWithDeref())

+ DPV->eraseFromParent();

return eraseInstFromFunction(MI);

}

@@ -2612,7 +2847,7 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,

for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {

if (&Instr == FreeInstrBBTerminator)

break;

- Instr.moveBefore(TI);

+ Instr.moveBeforePreserving(TI);

}

assert(FreeInstrBB->size() == 1 &&

"Only the branch instruction should remain");

@@ -2746,55 +2981,77 @@ Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {

return nullptr;

}

+void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To,

+ SmallVectorImpl<BasicBlock *> &Worklist) {

+ if (!DeadEdges.insert({From, To}).second)

+ return;

+ // Replace phi node operands in successor with poison.

+ for (PHINode &PN : To->phis())

+ for (Use &U : PN.incoming_values())

+ if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {

+ replaceUse(U, PoisonValue::get(PN.getType()));

+ addToWorklist(&PN);

+ MadeIRChange = true;

+ }

+ Worklist.push_back(To);

// Under the assumption that I is unreachable, remove it and following

-// instructions.

-bool InstCombinerImpl::handleUnreachableFrom(Instruction *I) {

- bool Changed = false;

+// instructions. Changes are reported directly to MadeIRChange.

+void InstCombinerImpl::handleUnreachableFrom(

+ Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) {

BasicBlock *BB = I->getParent();

for (Instruction &Inst : make_early_inc_range(

make_range(std::next(BB->getTerminator()->getReverseIterator()),

std::next(I->getReverseIterator())))) {

if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {

replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));

- Changed = true;

+ MadeIRChange = true;

}

if (Inst.isEHPad() || Inst.getType()->isTokenTy())

continue;

+ // RemoveDIs: erase debug-info on this instruction manually.

+ Inst.dropDbgValues();

eraseInstFromFunction(Inst);

- Changed = true;

+ MadeIRChange = true;

}

- // Replace phi node operands in successor blocks with poison.

+ // RemoveDIs: to match behaviour in dbg.value mode, drop debug-info on

+ // terminator too.

+ BB->getTerminator()->dropDbgValues();

+ // Handle potentially dead successors.

for (BasicBlock *Succ : successors(BB))

- for (PHINode &PN : Succ->phis())

- for (Use &U : PN.incoming_values())

- if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {

- replaceUse(U, PoisonValue::get(PN.getType()));

- addToWorklist(&PN);

- Changed = true;

- }

+ addDeadEdge(BB, Succ, Worklist);

- // TODO: Successor blocks may also be dead.

- return Changed;

+void InstCombinerImpl::handlePotentiallyDeadBlocks(

+ SmallVectorImpl<BasicBlock *> &Worklist) {

+ while (!Worklist.empty()) {

+ BasicBlock *BB = Worklist.pop_back_val();

+ if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {

+ return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);

+ }))

+ continue;

+ handleUnreachableFrom(&BB->front(), Worklist);

+ }

}

-bool InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,

+void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,

BasicBlock *LiveSucc) {

- bool Changed = false;

+ SmallVector<BasicBlock *> Worklist;

for (BasicBlock *Succ : successors(BB)) {

// The live successor isn't dead.

if (Succ == LiveSucc)

continue;

- if (!all_of(predecessors(Succ), [&](BasicBlock *Pred) {

- return DT.dominates(BasicBlockEdge(BB, Succ),

- BasicBlockEdge(Pred, Succ));

- }))

- continue;

- Changed |= handleUnreachableFrom(&Succ->front());

+ addDeadEdge(BB, Succ, Worklist);

}

- return Changed;

+ handlePotentiallyDeadBlocks(Worklist);

}

Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {

@@ -2840,14 +3097,17 @@ Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {

return &BI;

}

- if (isa<UndefValue>(Cond) &&

- handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr))

- return &BI;

- if (auto *CI = dyn_cast<ConstantInt>(Cond))

- if (handlePotentiallyDeadSuccessors(BI.getParent(),

- BI.getSuccessor(!CI->getZExtValue())))

- return &BI;

+ if (isa<UndefValue>(Cond)) {

+ handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);

+ return nullptr;

+ }

+ if (auto *CI = dyn_cast<ConstantInt>(Cond)) {

+ handlePotentiallyDeadSuccessors(BI.getParent(),

+ BI.getSuccessor(!CI->getZExtValue()));

+ return nullptr;

+ }

+ DC.registerBranch(&BI);

return nullptr;

}

@@ -2866,14 +3126,6 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {

return replaceOperand(SI, 0, Op0);

}

- if (isa<UndefValue>(Cond) &&

- handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr))

- return &SI;

- if (auto *CI = dyn_cast<ConstantInt>(Cond))

- if (handlePotentiallyDeadSuccessors(

- SI.getParent(), SI.findCaseValue(CI)->getCaseSuccessor()))

- return &SI;

KnownBits Known = computeKnownBits(Cond, 0, &SI);

unsigned LeadingKnownZeros = Known.countMinLeadingZeros();

unsigned LeadingKnownOnes = Known.countMinLeadingOnes();

@@ -2906,6 +3158,16 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {

return replaceOperand(SI, 0, NewCond);

}

+ if (isa<UndefValue>(Cond)) {

+ handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);

+ return nullptr;

+ }

+ if (auto *CI = dyn_cast<ConstantInt>(Cond)) {

+ handlePotentiallyDeadSuccessors(SI.getParent(),

+ SI.findCaseValue(CI)->getCaseSuccessor());

+ return nullptr;

+ }

return nullptr;

}

@@ -3532,7 +3794,7 @@ Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,

Value *StartV = StartU->get();

BasicBlock *StartBB = PN->getIncomingBlock(*StartU);

bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);

- // We can't insert freeze if the the start value is the result of the

+ // We can't insert freeze if the start value is the result of the

// terminator (e.g. an invoke).

if (StartNeedsFreeze && StartBB->getTerminator() == StartV)

return nullptr;

@@ -3583,19 +3845,27 @@ bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {

// *all* uses if the operand is an invoke/callbr and the use is in a phi on

// the normal/default destination. This is why the domination check in the

// replacement below is still necessary.

- Instruction *MoveBefore;

+ BasicBlock::iterator MoveBefore;

if (isa<Argument>(Op)) {

MoveBefore =

- &*FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();

+ FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();

} else {

- MoveBefore = cast<Instruction>(Op)->getInsertionPointAfterDef();

- if (!MoveBefore)

+ auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();

+ if (!MoveBeforeOpt)

return false;

+ MoveBefore = *MoveBeforeOpt;

}

+ // Don't move to the position of a debug intrinsic.

+ if (isa<DbgInfoIntrinsic>(MoveBefore))

+ MoveBefore = MoveBefore->getNextNonDebugInstruction()->getIterator();

+ // Re-point iterator to come after any debug-info records, if we're

+ // running in "RemoveDIs" mode

+ MoveBefore.setHeadBit(false);

bool Changed = false;

- if (&FI != MoveBefore) {

- FI.moveBefore(MoveBefore);

+ if (&FI != &*MoveBefore) {

+ FI.moveBefore(*MoveBefore->getParent(), MoveBefore);

Changed = true;

}

@@ -3798,7 +4068,7 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,

/// the new position.

BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();

- I->moveBefore(&*InsertPos);

+ I->moveBefore(*DestBlock, InsertPos);

++NumSunkInst;

// Also sink all related debug uses from the source basic block. Otherwise we

@@ -3808,10 +4078,19 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,

// here, but that computation has been sunk.

SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;

findDbgUsers(DbgUsers, I);

- // Process the sinking DbgUsers in reverse order, as we only want to clone the

- // last appearing debug intrinsic for each given variable.

+ // For all debug values in the destination block, the sunk instruction

+ // will still be available, so they do not need to be dropped.

+ SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSalvage;

+ SmallVector<DPValue *, 2> DPValuesToSalvage;

+ for (auto &DbgUser : DbgUsers)

+ if (DbgUser->getParent() != DestBlock)

+ DbgUsersToSalvage.push_back(DbgUser);

+ // Process the sinking DbgUsersToSalvage in reverse order, as we only want

+ // to clone the last appearing debug intrinsic for each given variable.

SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSink;

- for (DbgVariableIntrinsic *DVI : DbgUsers)

+ for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)

if (DVI->getParent() == SrcBlock)

DbgUsersToSink.push_back(DVI);

llvm::sort(DbgUsersToSink,

@@ -3847,7 +4126,10 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,

// Perform salvaging without the clones, then sink the clones.

if (!DIIClones.empty()) {

- salvageDebugInfoForDbgValues(*I, DbgUsers);

+ // RemoveDIs: pass in empty vector of DPValues until we get to instrumenting

+ // this pass.

+ SmallVector<DPValue *, 1> DummyDPValues;

+ salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, DummyDPValues);

// The clones are in reverse order of original appearance, reverse again to

// maintain the original order.

for (auto &DIIClone : llvm::reverse(DIIClones)) {

@@ -4093,43 +4375,52 @@ public:

}

};

-/// Populate the IC worklist from a function, by walking it in depth-first

-/// order and adding all reachable code to the worklist.

+/// Populate the IC worklist from a function, by walking it in reverse

+/// post-order and adding all reachable code to the worklist.

///

/// This has a couple of tricks to make the code faster and more powerful. In

/// particular, we constant fold and DCE instructions as we go, to avoid adding

/// them to the worklist (this significantly speeds up instcombine on code where

/// many instructions are dead or constant). Additionally, if we find a branch

/// whose condition is a known constant, we only visit the reachable successors.

-static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,

- const TargetLibraryInfo *TLI,

- InstructionWorklist &ICWorklist) {

+bool InstCombinerImpl::prepareWorklist(

+ Function &F, ReversePostOrderTraversal<BasicBlock *> &RPOT) {

bool MadeIRChange = false;

- SmallPtrSet<BasicBlock *, 32> Visited;

- SmallVector<BasicBlock*, 256> Worklist;

- Worklist.push_back(&F.front());

+ SmallPtrSet<BasicBlock *, 32> LiveBlocks;

SmallVector<Instruction *, 128> InstrsForInstructionWorklist;

DenseMap<Constant *, Constant *> FoldedConstants;

AliasScopeTracker SeenAliasScopes;

- do {

- BasicBlock *BB = Worklist.pop_back_val();

+ auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {

+ for (BasicBlock *Succ : successors(BB))

+ if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)

+ for (PHINode &PN : Succ->phis())

+ for (Use &U : PN.incoming_values())

+ if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {

+ U.set(PoisonValue::get(PN.getType()));

+ MadeIRChange = true;

+ }

+ };

- // We have now visited this block! If we've already been here, ignore it.

- if (!Visited.insert(BB).second)

+ for (BasicBlock *BB : RPOT) {

+ if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {

+ return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);

+ })) {

+ HandleOnlyLiveSuccessor(BB, nullptr);

continue;

+ }

+ LiveBlocks.insert(BB);

for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {

// ConstantProp instruction if trivially constant.

if (!Inst.use_empty() &&

(Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))

- if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {

+ if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {

LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst

<< '\n');

Inst.replaceAllUsesWith(C);

++NumConstProp;

- if (isInstructionTriviallyDead(&Inst, TLI))

+ if (isInstructionTriviallyDead(&Inst, &TLI))

Inst.eraseFromParent();

MadeIRChange = true;

continue;

@@ -4143,7 +4434,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,

auto *C = cast<Constant>(U);

Constant *&FoldRes = FoldedConstants[C];

if (!FoldRes)

- FoldRes = ConstantFoldConstant(C, DL, TLI);

+ FoldRes = ConstantFoldConstant(C, DL, &TLI);

if (FoldRes != C) {

LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst

@@ -4163,37 +4454,39 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,

}

- // Recursively visit successors. If this is a branch or switch on a

- // constant, only visit the reachable successor.

+ // If this is a branch or switch on a constant, mark only the single

+ // live successor. Otherwise assume all successors are live.

Instruction *TI = BB->getTerminator();

if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {

- if (isa<UndefValue>(BI->getCondition()))

+ if (isa<UndefValue>(BI->getCondition())) {

// Branch on undef is UB.

+ HandleOnlyLiveSuccessor(BB, nullptr);

continue;

+ }

if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {

bool CondVal = Cond->getZExtValue();

- BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);

- Worklist.push_back(ReachableBB);

+ HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));

continue;

}

} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {

- if (isa<UndefValue>(SI->getCondition()))

+ if (isa<UndefValue>(SI->getCondition())) {

// Switch on undef is UB.

+ HandleOnlyLiveSuccessor(BB, nullptr);

continue;

+ }

if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {

- Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());

+ HandleOnlyLiveSuccessor(BB,

+ SI->findCaseValue(Cond)->getCaseSuccessor());

continue;

}

- append_range(Worklist, successors(TI));

- } while (!Worklist.empty());

+ }

// Remove instructions inside unreachable blocks. This prevents the

// instcombine code from having to deal with some bad special cases, and

// reduces use counts of instructions.

for (BasicBlock &BB : F) {

- if (Visited.count(&BB))

+ if (LiveBlocks.count(&BB))

continue;

unsigned NumDeadInstInBB;

@@ -4210,11 +4503,11 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,

// of the function down. This jives well with the way that it adds all uses

// of instructions to the worklist after doing a transformation, thus avoiding

// some N^2 behavior in pathological cases.

- ICWorklist.reserve(InstrsForInstructionWorklist.size());

+ Worklist.reserve(InstrsForInstructionWorklist.size());

for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {

// DCE instruction if trivially dead. As we iterate in reverse program

// order here, we will clean up whole chains of dead instructions.

- if (isInstructionTriviallyDead(Inst, TLI) ||

+ if (isInstructionTriviallyDead(Inst, &TLI) ||

SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {

++NumDeadInst;

LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');

@@ -4224,7 +4517,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,

continue;

}

- ICWorklist.push(Inst);

+ Worklist.push(Inst);

}

return MadeIRChange;

@@ -4234,7 +4527,7 @@ static bool combineInstructionsOverFunction(

Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,

AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,

DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,

- ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {

+ ProfileSummaryInfo *PSI, LoopInfo *LI, const InstCombineOptions &Opts) {

auto &DL = F.getParent()->getDataLayout();

/// Builder - This is an IRBuilder that automatically inserts new

@@ -4247,6 +4540,8 @@ static bool combineInstructionsOverFunction(

AC.registerAssumption(Assume);

}));

+ ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());

// Lower dbg.declare intrinsics otherwise their value may be clobbered

// by instcombiner.

bool MadeIRChange = false;

@@ -4256,35 +4551,33 @@ static bool combineInstructionsOverFunction(

// Iterate while there is work to do.

unsigned Iteration = 0;

while (true) {

- ++NumWorklistIterations;

++Iteration;

- if (Iteration > InfiniteLoopDetectionThreshold) {

- report_fatal_error(

- "Instruction Combining seems stuck in an infinite loop after " +

- Twine(InfiniteLoopDetectionThreshold) + " iterations.");

- }

- if (Iteration > MaxIterations) {

- LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations

+ if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {

+ LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations

<< " on " << F.getName()

- << " reached; stopping before reaching a fixpoint\n");

+ << " reached; stopping without verifying fixpoint\n");

break;

}

+ ++NumWorklistIterations;

LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "

<< F.getName() << "\n");

- MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);

InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,

ORE, BFI, PSI, DL, LI);

IC.MaxArraySizeForCombine = MaxArraySize;

- if (!IC.run())

+ bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);

+ MadeChangeInThisIteration |= IC.run();

+ if (!MadeChangeInThisIteration)

break;

MadeIRChange = true;

+ if (Iteration > Opts.MaxIterations) {

+ report_fatal_error(

+ "Instruction Combining did not reach a fixpoint after " +

+ Twine(Opts.MaxIterations) + " iterations");

+ }

}

if (Iteration == 1)

@@ -4307,7 +4600,8 @@ void InstCombinePass::printPipeline(

OS, MapClassName2PassName);

OS << '<';

OS << "max-iterations=" << Options.MaxIterations << ";";

- OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info";

+ OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";

+ OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";

OS << '>';

}

@@ -4333,7 +4627,7 @@ PreservedAnalyses InstCombinePass::run(Function &F,

&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;

if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,

- BFI, PSI, Options.MaxIterations, LI))

+ BFI, PSI, LI, Options))

// No changes, all analyses are preserved.

return PreservedAnalyses::all();

@@ -4382,8 +4676,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {

nullptr;

return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,

- BFI, PSI,

- InstCombineDefaultMaxIterations, LI);

+ BFI, PSI, LI, InstCombineOptions());

}

char InstructionCombiningPass::ID = 0;