summaryrefslogtreecommitdiff
path: root/lib/Transforms/InstCombine/InstCombineCompares.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
commitd8e91e46262bc44006913e6796843909f1ac7bcd (patch)
tree7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Transforms/InstCombine/InstCombineCompares.cpp
parentb7eb8e35e481a74962664b63dfb09483b200209a (diff)
Notes
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineCompares.cpp')
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp669
1 files changed, 459 insertions, 210 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6de92a4842ab..b5bbb09935e2 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -522,11 +522,9 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
}
// Otherwise, there is an index. The computation we will do will be modulo
- // the pointer size, so get it.
- uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
-
- Offset &= PtrSizeMask;
- VariableScale &= PtrSizeMask;
+ // the pointer size.
+ Offset = SignExtend64(Offset, IntPtrWidth);
+ VariableScale = SignExtend64(VariableScale, IntPtrWidth);
// To do this transformation, any constant index must be a multiple of the
// variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
@@ -909,7 +907,8 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
// If all indices are the same, just compare the base pointers.
- if (IndicesTheSame)
+ Type *BaseType = GEPLHS->getOperand(0)->getType();
+ if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType())
return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
// If we're comparing GEPs with two base pointers that only differ in type
@@ -976,7 +975,7 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
if (NumDifferences == 0) // SAME GEP?
return replaceInstUsesWith(I, // No comparison is needed here.
- Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond)));
+ ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond)));
else if (NumDifferences == 1 && GEPsInBounds) {
Value *LHSV = GEPLHS->getOperand(DiffOperand);
@@ -1079,19 +1078,20 @@ Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI,
ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
}
-/// Fold "icmp pred (X+CI), X".
-Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
+/// Fold "icmp pred (X+C), X".
+Instruction *InstCombiner::foldICmpAddOpConst(Value *X, const APInt &C,
ICmpInst::Predicate Pred) {
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similarly for all other "or equals"
// operators.
+ assert(!!C && "C should not be zero!");
// (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
// (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
// (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
- Value *R =
- ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
+ Constant *R = ConstantInt::get(X->getType(),
+ APInt::getMaxValue(C.getBitWidth()) - C);
return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
}
@@ -1099,11 +1099,10 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
// (X+2) >u X --> X <u (0-2) --> X <u 254
// (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
- return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+ return new ICmpInst(ICmpInst::ICMP_ULT, X,
+ ConstantInt::get(X->getType(), -C));
- unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
- ConstantInt *SMax = ConstantInt::get(X->getContext(),
- APInt::getSignedMaxValue(BitWidth));
+ APInt SMax = APInt::getSignedMaxValue(C.getBitWidth());
// (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
// (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
@@ -1112,7 +1111,8 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
// (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
// (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
- return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+ return new ICmpInst(ICmpInst::ICMP_SGT, X,
+ ConstantInt::get(X->getType(), SMax - C));
// (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
// (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
@@ -1122,8 +1122,8 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
// (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
- Constant *C = Builder.getInt(CI->getValue() - 1);
- return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+ return new ICmpInst(ICmpInst::ICMP_SLT, X,
+ ConstantInt::get(X->getType(), SMax - (C - 1)));
}
/// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
@@ -1333,17 +1333,12 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
return nullptr;
}
-// Fold icmp Pred X, C.
+/// Fold icmp Pred X, C.
+/// TODO: This code structure does not make sense. The saturating add fold
+/// should be moved to some other helper and extended as noted below (it is also
+/// possible that code has been made unnecessary - do we canonicalize IR to
+/// overflow/saturating intrinsics or not?).
Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
- CmpInst::Predicate Pred = Cmp.getPredicate();
- Value *X = Cmp.getOperand(0);
-
- const APInt *C;
- if (!match(Cmp.getOperand(1), m_APInt(C)))
- return nullptr;
-
- Value *A = nullptr, *B = nullptr;
-
// Match the following pattern, which is a common idiom when writing
// overflow-safe integer arithmetic functions. The source performs an addition
// in wider type and explicitly checks for overflow using comparisons against
@@ -1355,37 +1350,62 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
//
// sum = a + b
// if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
- {
- ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
- if (Pred == ICmpInst::ICMP_UGT &&
- match(X, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
- if (Instruction *Res = processUGT_ADDCST_ADD(
- Cmp, A, B, CI2, cast<ConstantInt>(Cmp.getOperand(1)), *this))
- return Res;
- }
+ CmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1);
+ Value *A, *B;
+ ConstantInt *CI, *CI2; // I = icmp ugt (add (add A, B), CI2), CI
+ if (Pred == ICmpInst::ICMP_UGT && match(Op1, m_ConstantInt(CI)) &&
+ match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+ if (Instruction *Res = processUGT_ADDCST_ADD(Cmp, A, B, CI2, CI, *this))
+ return Res;
+
+ return nullptr;
+}
- // FIXME: Use m_APInt to allow folds for splat constants.
- ConstantInt *CI = dyn_cast<ConstantInt>(Cmp.getOperand(1));
- if (!CI)
+/// Canonicalize icmp instructions based on dominating conditions.
+Instruction *InstCombiner::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
+ // This is a cheap/incomplete check for dominance - just match a single
+ // predecessor with a conditional branch.
+ BasicBlock *CmpBB = Cmp.getParent();
+ BasicBlock *DomBB = CmpBB->getSinglePredecessor();
+ if (!DomBB)
return nullptr;
- // Canonicalize icmp instructions based on dominating conditions.
- BasicBlock *Parent = Cmp.getParent();
- BasicBlock *Dom = Parent->getSinglePredecessor();
- auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr;
- ICmpInst::Predicate Pred2;
+ Value *DomCond;
BasicBlock *TrueBB, *FalseBB;
- ConstantInt *CI2;
- if (BI && match(BI, m_Br(m_ICmp(Pred2, m_Specific(X), m_ConstantInt(CI2)),
- TrueBB, FalseBB)) &&
- TrueBB != FalseBB) {
- ConstantRange CR =
- ConstantRange::makeAllowedICmpRegion(Pred, CI->getValue());
+ if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
+ return nullptr;
+
+ assert((TrueBB == CmpBB || FalseBB == CmpBB) &&
+ "Predecessor block does not point to successor?");
+
+ // The branch should get simplified. Don't bother simplifying this condition.
+ if (TrueBB == FalseBB)
+ return nullptr;
+
+ // Try to simplify this compare to T/F based on the dominating condition.
+ Optional<bool> Imp = isImpliedCondition(DomCond, &Cmp, DL, TrueBB == CmpBB);
+ if (Imp)
+ return replaceInstUsesWith(Cmp, ConstantInt::get(Cmp.getType(), *Imp));
+
+ CmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);
+ ICmpInst::Predicate DomPred;
+ const APInt *C, *DomC;
+ if (match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))) &&
+ match(Y, m_APInt(C))) {
+ // We have 2 compares of a variable with constants. Calculate the constant
+ // ranges of those compares to see if we can transform the 2nd compare:
+ // DomBB:
+ // DomCond = icmp DomPred X, DomC
+ // br DomCond, CmpBB, FalseBB
+ // CmpBB:
+ // Cmp = icmp Pred X, C
+ ConstantRange CR = ConstantRange::makeAllowedICmpRegion(Pred, *C);
ConstantRange DominatingCR =
- (Parent == TrueBB)
- ? ConstantRange::makeExactICmpRegion(Pred2, CI2->getValue())
- : ConstantRange::makeExactICmpRegion(
- CmpInst::getInversePredicate(Pred2), CI2->getValue());
+ (CmpBB == TrueBB) ? ConstantRange::makeExactICmpRegion(DomPred, *DomC)
+ : ConstantRange::makeExactICmpRegion(
+ CmpInst::getInversePredicate(DomPred), *DomC);
ConstantRange Intersection = DominatingCR.intersectWith(CR);
ConstantRange Difference = DominatingCR.difference(CR);
if (Intersection.isEmptySet())
@@ -1393,23 +1413,20 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
if (Difference.isEmptySet())
return replaceInstUsesWith(Cmp, Builder.getTrue());
- // If this is a normal comparison, it demands all bits. If it is a sign
- // bit comparison, it only demands the sign bit.
- bool UnusedBit;
- bool IsSignBit = isSignBitCheck(Pred, CI->getValue(), UnusedBit);
-
// Canonicalizing a sign bit comparison that gets used in a branch,
// pessimizes codegen by generating branch on zero instruction instead
// of a test and branch. So we avoid canonicalizing in such situations
// because test and branch instruction has better branch displacement
// than compare and branch instruction.
+ bool UnusedBit;
+ bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit);
if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
return nullptr;
- if (auto *AI = Intersection.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI));
- if (auto *AD = Difference.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD));
+ if (const APInt *EqC = Intersection.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC));
+ if (const APInt *NeC = Difference.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC));
}
return nullptr;
@@ -1498,16 +1515,25 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
}
}
- // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
- // iff -C is a power of 2
- if (Pred == ICmpInst::ICMP_UGT && *XorC == ~C && (C + 1).isPowerOf2())
- return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
-
- // (icmp ult (xor X, C), -C) -> (icmp uge X, C)
- // iff -C is a power of 2
- if (Pred == ICmpInst::ICMP_ULT && *XorC == -C && C.isPowerOf2())
- return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
-
+ // Mask constant magic can eliminate an 'xor' with unsigned compares.
+ if (Pred == ICmpInst::ICMP_UGT) {
+ // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2)
+ if (*XorC == ~C && (C + 1).isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
+ // (xor X, C) >u C --> X >u C (when C+1 is a power of 2)
+ if (*XorC == C && (C + 1).isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
+ }
+ if (Pred == ICmpInst::ICMP_ULT) {
+ // (xor X, -C) <u C --> X >u ~C (when C is a power of 2)
+ if (*XorC == -C && C.isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_UGT, X,
+ ConstantInt::get(X->getType(), ~C));
+ // (xor X, C) <u C --> X >u ~C (when -C is a power of 2)
+ if (*XorC == C && (-C).isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_UGT, X,
+ ConstantInt::get(X->getType(), ~C));
+ }
return nullptr;
}
@@ -1598,6 +1624,13 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
BinaryOperator *And,
const APInt &C1) {
+ // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
+ // TODO: We canonicalize to the longer form for scalars because we have
+ // better analysis/folds for icmp, and codegen may be better with icmp.
+ if (Cmp.getPredicate() == CmpInst::ICMP_NE && Cmp.getType()->isVectorTy() &&
+ C1.isNullValue() && match(And->getOperand(1), m_One()))
+ return new TruncInst(And->getOperand(0), Cmp.getType());
+
const APInt *C2;
if (!match(And->getOperand(1), m_APInt(C2)))
return nullptr;
@@ -2336,13 +2369,19 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
Type *Ty = Add->getType();
CmpInst::Predicate Pred = Cmp.getPredicate();
+ if (!Add->hasOneUse())
+ return nullptr;
+
// If the add does not wrap, we can always adjust the compare by subtracting
- // the constants. Equality comparisons are handled elsewhere. SGE/SLE are
- // canonicalized to SGT/SLT.
- if (Add->hasNoSignedWrap() &&
- (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) {
+ // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
+ // are canonicalized to SGT/SLT/UGT/ULT.
+ if ((Add->hasNoSignedWrap() &&
+ (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) ||
+ (Add->hasNoUnsignedWrap() &&
+ (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) {
bool Overflow;
- APInt NewC = C.ssub_ov(*C2, Overflow);
+ APInt NewC =
+ Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow);
// If there is overflow, the result must be true or false.
// TODO: Can we assert there is no overflow because InstSimplify always
// handles those cases?
@@ -2366,9 +2405,6 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
}
- if (!Add->hasOneUse())
- return nullptr;
-
// X+C <u C2 -> (X & -C2) == C
// iff C & (C2-1) == 0
// C2 is a power of 2
@@ -2729,6 +2765,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
// Handle icmp {eq|ne} <intrinsic>, Constant.
Type *Ty = II->getType();
+ unsigned BitWidth = C.getBitWidth();
switch (II->getIntrinsicID()) {
case Intrinsic::bswap:
Worklist.Add(II);
@@ -2737,21 +2774,39 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
return &Cmp;
case Intrinsic::ctlz:
- case Intrinsic::cttz:
+ case Intrinsic::cttz: {
// ctz(A) == bitwidth(A) -> A == 0 and likewise for !=
- if (C == C.getBitWidth()) {
+ if (C == BitWidth) {
Worklist.Add(II);
Cmp.setOperand(0, II->getArgOperand(0));
Cmp.setOperand(1, ConstantInt::getNullValue(Ty));
return &Cmp;
}
+
+ // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set
+ // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits.
+ // Limit to one use to ensure we don't increase instruction count.
+ unsigned Num = C.getLimitedValue(BitWidth);
+ if (Num != BitWidth && II->hasOneUse()) {
+ bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz;
+ APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1)
+ : APInt::getHighBitsSet(BitWidth, Num + 1);
+ APInt Mask2 = IsTrailing
+ ? APInt::getOneBitSet(BitWidth, Num)
+ : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
+ Cmp.setOperand(0, Builder.CreateAnd(II->getArgOperand(0), Mask1));
+ Cmp.setOperand(1, ConstantInt::get(Ty, Mask2));
+ Worklist.Add(II);
+ return &Cmp;
+ }
break;
+ }
case Intrinsic::ctpop: {
// popcount(A) == 0 -> A == 0 and likewise for !=
// popcount(A) == bitwidth(A) -> A == -1 and likewise for !=
bool IsZero = C.isNullValue();
- if (IsZero || C == C.getBitWidth()) {
+ if (IsZero || C == BitWidth) {
Worklist.Add(II);
Cmp.setOperand(0, II->getArgOperand(0));
auto *NewOp =
@@ -2870,15 +2925,25 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
/// In this case, we are looking for comparisons that look like
/// a check for a lossy truncation.
/// Folds:
-/// x & (-1 >> y) SrcPred x to x DstPred (-1 >> y)
+/// icmp SrcPred (x & Mask), x to icmp DstPred x, Mask
+/// Where Mask is some pattern that produces all-ones in low bits:
+/// (-1 >> y)
+/// ((-1 << y) >> y) <- non-canonical, has extra uses
+/// ~(-1 << y)
+/// ((1 << y) + (-1)) <- non-canonical, has extra uses
/// The Mask can be a constant, too.
/// For some predicates, the operands are commutative.
/// For others, x can only be on a specific side.
static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
InstCombiner::BuilderTy &Builder) {
ICmpInst::Predicate SrcPred;
- Value *X, *M;
- auto m_Mask = m_CombineOr(m_LShr(m_AllOnes(), m_Value()), m_LowBitMask());
+ Value *X, *M, *Y;
+ auto m_VariableMask = m_CombineOr(
+ m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())),
+ m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())),
+ m_CombineOr(m_LShr(m_AllOnes(), m_Value()),
+ m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y))));
+ auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask());
if (!match(&I, m_c_ICmp(SrcPred,
m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
m_Deferred(X))))
@@ -2924,12 +2989,20 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
// x & (-1 >> y) s>= x -> x s<= (-1 >> y)
if (X != I.getOperand(1)) // X must be on RHS of comparison!
return nullptr; // Ignore the other case.
+ if (!match(M, m_Constant())) // Can not do this fold with non-constant.
+ return nullptr;
+ if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
+ return nullptr;
DstPred = ICmpInst::Predicate::ICMP_SLE;
break;
case ICmpInst::Predicate::ICMP_SLT:
// x & (-1 >> y) s< x -> x s> (-1 >> y)
if (X != I.getOperand(1)) // X must be on RHS of comparison!
return nullptr; // Ignore the other case.
+ if (!match(M, m_Constant())) // Can not do this fold with non-constant.
+ return nullptr;
+ if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
+ return nullptr;
DstPred = ICmpInst::Predicate::ICMP_SGT;
break;
case ICmpInst::Predicate::ICMP_SLE:
@@ -3034,6 +3107,18 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
return nullptr;
const CmpInst::Predicate Pred = I.getPredicate();
+ Value *X;
+
+ // Convert add-with-unsigned-overflow comparisons into a 'not' with compare.
+ // (Op1 + X) <u Op1 --> ~Op1 <u X
+ // Op0 >u (Op0 + X) --> X >u ~Op0
+ if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) &&
+ Pred == ICmpInst::ICMP_ULT)
+ return new ICmpInst(Pred, Builder.CreateNot(Op1), X);
+ if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) &&
+ Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
+
bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
if (BO0 && isa<OverflowingBinaryOperator>(BO0))
NoOp0WrapProblem =
@@ -4598,6 +4683,83 @@ static Instruction *canonicalizeICmpBool(ICmpInst &I,
}
}
+// Transform pattern like:
+// (1 << Y) u<= X or ~(-1 << Y) u< X or ((1 << Y)+(-1)) u< X
+// (1 << Y) u> X or ~(-1 << Y) u>= X or ((1 << Y)+(-1)) u>= X
+// Into:
+// (X l>> Y) != 0
+// (X l>> Y) == 0
+static Instruction *foldICmpWithHighBitMask(ICmpInst &Cmp,
+ InstCombiner::BuilderTy &Builder) {
+ ICmpInst::Predicate Pred, NewPred;
+ Value *X, *Y;
+ if (match(&Cmp,
+ m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) {
+ // We want X to be the icmp's second operand, so swap predicate if it isn't.
+ if (Cmp.getOperand(0) == X)
+ Pred = Cmp.getSwappedPredicate();
+
+ switch (Pred) {
+ case ICmpInst::ICMP_ULE:
+ NewPred = ICmpInst::ICMP_NE;
+ break;
+ case ICmpInst::ICMP_UGT:
+ NewPred = ICmpInst::ICMP_EQ;
+ break;
+ default:
+ return nullptr;
+ }
+ } else if (match(&Cmp, m_c_ICmp(Pred,
+ m_OneUse(m_CombineOr(
+ m_Not(m_Shl(m_AllOnes(), m_Value(Y))),
+ m_Add(m_Shl(m_One(), m_Value(Y)),
+ m_AllOnes()))),
+ m_Value(X)))) {
+ // The variant with 'add' is not canonical, (the variant with 'not' is)
+ // we only get it because it has extra uses, and can't be canonicalized,
+
+ // We want X to be the icmp's second operand, so swap predicate if it isn't.
+ if (Cmp.getOperand(0) == X)
+ Pred = Cmp.getSwappedPredicate();
+
+ switch (Pred) {
+ case ICmpInst::ICMP_ULT:
+ NewPred = ICmpInst::ICMP_NE;
+ break;
+ case ICmpInst::ICMP_UGE:
+ NewPred = ICmpInst::ICMP_EQ;
+ break;
+ default:
+ return nullptr;
+ }
+ } else
+ return nullptr;
+
+ Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits");
+ Constant *Zero = Constant::getNullValue(NewX->getType());
+ return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero);
+}
+
+static Instruction *foldVectorCmp(CmpInst &Cmp,
+ InstCombiner::BuilderTy &Builder) {
+ // If both arguments of the cmp are shuffles that use the same mask and
+ // shuffle within a single vector, move the shuffle after the cmp.
+ Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1);
+ Value *V1, *V2;
+ Constant *M;
+ if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(M))) &&
+ match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(M))) &&
+ V1->getType() == V2->getType() &&
+ (LHS->hasOneUse() || RHS->hasOneUse())) {
+ // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M
+ CmpInst::Predicate P = Cmp.getPredicate();
+ Value *NewCmp = isa<ICmpInst>(Cmp) ? Builder.CreateICmp(P, V1, V2)
+ : Builder.CreateFCmp(P, V1, V2);
+ return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()), M);
+ }
+ return nullptr;
+}
+
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -4645,6 +4807,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpWithConstant(I))
return Res;
+ if (Instruction *Res = foldICmpWithDominatingICmp(I))
+ return Res;
+
if (Instruction *Res = foldICmpUsingKnownBits(I))
return Res;
@@ -4857,16 +5022,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return ExtractValueInst::Create(ACXI, 1);
{
- Value *X; ConstantInt *Cst;
+ Value *X;
+ const APInt *C;
// icmp X+Cst, X
- if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
- return foldICmpAddOpConst(X, Cst, I.getPredicate());
+ if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
+ return foldICmpAddOpConst(X, *C, I.getPredicate());
// icmp X, X+Cst
- if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
- return foldICmpAddOpConst(X, Cst, I.getSwappedPredicate());
+ if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X)
+ return foldICmpAddOpConst(X, *C, I.getSwappedPredicate());
}
+ if (Instruction *Res = foldICmpWithHighBitMask(I, Builder))
+ return Res;
+
+ if (I.getType()->isVectorTy())
+ if (Instruction *Res = foldVectorCmp(I, Builder))
+ return Res;
+
return Changed ? &I : nullptr;
}
@@ -5109,6 +5282,117 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
}
+/// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary.
+static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
+ Constant *RHSC) {
+ // When C is not 0.0 and infinities are not allowed:
+ // (C / X) < 0.0 is a sign-bit test of X
+ // (C / X) < 0.0 --> X < 0.0 (if C is positive)
+ // (C / X) < 0.0 --> X > 0.0 (if C is negative, swap the predicate)
+ //
+ // Proof:
+ // Multiply (C / X) < 0.0 by X * X / C.
+ // - X is non zero, if it is the flag 'ninf' is violated.
+ // - C defines the sign of X * X * C. Thus it also defines whether to swap
+ // the predicate. C is also non zero by definition.
+ //
+ // Thus X * X / C is non zero and the transformation is valid. [qed]
+
+ FCmpInst::Predicate Pred = I.getPredicate();
+
+ // Check that predicates are valid.
+ if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
+ (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
+ return nullptr;
+
+ // Check that RHS operand is zero.
+ if (!match(RHSC, m_AnyZeroFP()))
+ return nullptr;
+
+ // Check fastmath flags ('ninf').
+ if (!LHSI->hasNoInfs() || !I.hasNoInfs())
+ return nullptr;
+
+ // Check the properties of the dividend. It must not be zero to avoid a
+ // division by zero (see Proof).
+ const APFloat *C;
+ if (!match(LHSI->getOperand(0), m_APFloat(C)))
+ return nullptr;
+
+ if (C->isZero())
+ return nullptr;
+
+ // Get swapped predicate if necessary.
+ if (C->isNegative())
+ Pred = I.getSwappedPredicate();
+
+ return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I);
+}
+
+/// Optimize fabs(X) compared with zero.
+static Instruction *foldFabsWithFcmpZero(FCmpInst &I) {
+ Value *X;
+ if (!match(I.getOperand(0), m_Intrinsic<Intrinsic::fabs>(m_Value(X))) ||
+ !match(I.getOperand(1), m_PosZeroFP()))
+ return nullptr;
+
+ auto replacePredAndOp0 = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) {
+ I->setPredicate(P);
+ I->setOperand(0, X);
+ return I;
+ };
+
+ switch (I.getPredicate()) {
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OLT:
+ // fabs(X) >= 0.0 --> true
+ // fabs(X) < 0.0 --> false
+ llvm_unreachable("fcmp should have simplified");
+
+ case FCmpInst::FCMP_OGT:
+ // fabs(X) > 0.0 --> X != 0.0
+ return replacePredAndOp0(&I, FCmpInst::FCMP_ONE, X);
+
+ case FCmpInst::FCMP_UGT:
+ // fabs(X) u> 0.0 --> X u!= 0.0
+ return replacePredAndOp0(&I, FCmpInst::FCMP_UNE, X);
+
+ case FCmpInst::FCMP_OLE:
+ // fabs(X) <= 0.0 --> X == 0.0
+ return replacePredAndOp0(&I, FCmpInst::FCMP_OEQ, X);
+
+ case FCmpInst::FCMP_ULE:
+ // fabs(X) u<= 0.0 --> X u== 0.0
+ return replacePredAndOp0(&I, FCmpInst::FCMP_UEQ, X);
+
+ case FCmpInst::FCMP_OGE:
+ // fabs(X) >= 0.0 --> !isnan(X)
+ assert(!I.hasNoNaNs() && "fcmp should have simplified");
+ return replacePredAndOp0(&I, FCmpInst::FCMP_ORD, X);
+
+ case FCmpInst::FCMP_ULT:
+ // fabs(X) u< 0.0 --> isnan(X)
+ assert(!I.hasNoNaNs() && "fcmp should have simplified");
+ return replacePredAndOp0(&I, FCmpInst::FCMP_UNO, X);
+
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ORD:
+ case FCmpInst::FCMP_UNO:
+ // Look through the fabs() because it doesn't change anything but the sign.
+ // fabs(X) == 0.0 --> X == 0.0,
+ // fabs(X) != 0.0 --> X != 0.0
+ // isnan(fabs(X)) --> isnan(X)
+ // !isnan(fabs(X) --> !isnan(X)
+ return replacePredAndOp0(&I, I.getPredicate(), X);
+
+ default:
+ return nullptr;
+ }
+}
+
Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
bool Changed = false;
@@ -5153,11 +5437,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
// If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand,
// then canonicalize the operand to 0.0.
if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
- if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0)) {
+ if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, &TLI)) {
I.setOperand(0, ConstantFP::getNullValue(Op0->getType()));
return &I;
}
- if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1)) {
+ if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1, &TLI)) {
I.setOperand(1, ConstantFP::getNullValue(Op0->getType()));
return &I;
}
@@ -5178,128 +5462,93 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return nullptr;
}
- // Handle fcmp with constant RHS
- if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
- if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
- switch (LHSI->getOpcode()) {
- case Instruction::FPExt: {
- // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
- FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
- ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
- if (!RHSF)
- break;
-
- const fltSemantics *Sem;
- // FIXME: This shouldn't be here.
- if (LHSExt->getSrcTy()->isHalfTy())
- Sem = &APFloat::IEEEhalf();
- else if (LHSExt->getSrcTy()->isFloatTy())
- Sem = &APFloat::IEEEsingle();
- else if (LHSExt->getSrcTy()->isDoubleTy())
- Sem = &APFloat::IEEEdouble();
- else if (LHSExt->getSrcTy()->isFP128Ty())
- Sem = &APFloat::IEEEquad();
- else if (LHSExt->getSrcTy()->isX86_FP80Ty())
- Sem = &APFloat::x87DoubleExtended();
- else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
- Sem = &APFloat::PPCDoubleDouble();
- else
- break;
-
- bool Lossy;
- APFloat F = RHSF->getValueAPF();
- F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
-
- // Avoid lossy conversions and denormals. Zero is a special case
- // that's OK to convert.
- APFloat Fabs = F;
- Fabs.clearSign();
- if (!Lossy &&
- ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) !=
- APFloat::cmpLessThan) || Fabs.isZero()))
-
- return new FCmpInst(Pred, LHSExt->getOperand(0),
- ConstantFP::get(RHSC->getContext(), F));
- break;
- }
- case Instruction::PHI:
- // Only fold fcmp into the PHI if the phi and fcmp are in the same
- // block. If in the same block, we're encouraging jump threading. If
- // not, we are just pessimizing the code by making an i1 phi.
- if (LHSI->getParent() == I.getParent())
- if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
- return NV;
- break;
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
+ // The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0:
+ // fcmp Pred X, -0.0 --> fcmp Pred X, 0.0
+ if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP())) {
+ I.setOperand(1, ConstantFP::getNullValue(Op1->getType()));
+ return &I;
+ }
+
+ // Handle fcmp with instruction LHS and constant RHS.
+ Instruction *LHSI;
+ Constant *RHSC;
+ if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) {
+ switch (LHSI->getOpcode()) {
+ case Instruction::PHI:
+ // Only fold fcmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
return NV;
- break;
- case Instruction::FSub: {
- // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
- Value *Op;
- if (match(LHSI, m_FNeg(m_Value(Op))))
- return new FCmpInst(I.getSwappedPredicate(), Op,
- ConstantExpr::getFNeg(RHSC));
- break;
- }
- case Instruction::Load:
- if (GetElementPtrInst *GEP =
- dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
- !cast<LoadInst>(LHSI)->isVolatile())
- if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
- return Res;
- }
- break;
- case Instruction::Call: {
- if (!RHSC->isNullValue())
- break;
+ break;
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
+ return NV;
+ break;
+ case Instruction::FDiv:
+ if (Instruction *NV = foldFCmpReciprocalAndZero(I, LHSI, RHSC))
+ return NV;
+ break;
+ case Instruction::Load:
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
+ if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ break;
+ }
+ }
- CallInst *CI = cast<CallInst>(LHSI);
- Intrinsic::ID IID = getIntrinsicForCallSite(CI, &TLI);
- if (IID != Intrinsic::fabs)
- break;
+ if (Instruction *R = foldFabsWithFcmpZero(I))
+ return R;
- // Various optimization for fabs compared with zero.
- switch (Pred) {
- default:
- break;
- // fabs(x) < 0 --> false
- case FCmpInst::FCMP_OLT:
- llvm_unreachable("handled by SimplifyFCmpInst");
- // fabs(x) > 0 --> x != 0
- case FCmpInst::FCMP_OGT:
- return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), RHSC);
- // fabs(x) <= 0 --> x == 0
- case FCmpInst::FCMP_OLE:
- return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), RHSC);
- // fabs(x) >= 0 --> !isnan(x)
- case FCmpInst::FCMP_OGE:
- return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), RHSC);
- // fabs(x) == 0 --> x == 0
- // fabs(x) != 0 --> x != 0
- case FCmpInst::FCMP_OEQ:
- case FCmpInst::FCMP_UEQ:
- case FCmpInst::FCMP_ONE:
- case FCmpInst::FCMP_UNE:
- return new FCmpInst(Pred, CI->getArgOperand(0), RHSC);
- }
- }
+ Value *X, *Y;
+ if (match(Op0, m_FNeg(m_Value(X)))) {
+ // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
+ if (match(Op1, m_FNeg(m_Value(Y))))
+ return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
+
+ // fcmp pred (fneg X), C --> fcmp swap(pred) X, -C
+ Constant *C;
+ if (match(Op1, m_Constant(C))) {
+ Constant *NegC = ConstantExpr::getFNeg(C);
+ return new FCmpInst(I.getSwappedPredicate(), X, NegC, "", &I);
+ }
+ }
+
+ if (match(Op0, m_FPExt(m_Value(X)))) {
+ // fcmp (fpext X), (fpext Y) -> fcmp X, Y
+ if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType())
+ return new FCmpInst(Pred, X, Y, "", &I);
+
+ // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless
+ const APFloat *C;
+ if (match(Op1, m_APFloat(C))) {
+ const fltSemantics &FPSem =
+ X->getType()->getScalarType()->getFltSemantics();
+ bool Lossy;
+ APFloat TruncC = *C;
+ TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy);
+
+ // Avoid lossy conversions and denormals.
+ // Zero is a special case that's OK to convert.
+ APFloat Fabs = TruncC;
+ Fabs.clearSign();
+ if (!Lossy &&
+ ((Fabs.compare(APFloat::getSmallestNormalized(FPSem)) !=
+ APFloat::cmpLessThan) || Fabs.isZero())) {
+ Constant *NewC = ConstantFP::get(X->getType(), TruncC);
+ return new FCmpInst(Pred, X, NewC, "", &I);
}
+ }
}
- // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
- Value *X, *Y;
- if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
- return new FCmpInst(I.getSwappedPredicate(), X, Y);
-
- // fcmp (fpext x), (fpext y) -> fcmp x, y
- if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
- if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
- if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
- return new FCmpInst(Pred, LHSExt->getOperand(0), RHSExt->getOperand(0));
+ if (I.getType()->isVectorTy())
+ if (Instruction *Res = foldVectorCmp(I, Builder))
+ return Res;
return Changed ? &I : nullptr;
}