diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
| commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
| tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Transforms/InstCombine/InstCombineCompares.cpp | |
| parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) | |
Notes
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineCompares.cpp')
| -rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 669 |
1 files changed, 459 insertions, 210 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6de92a4842ab..b5bbb09935e2 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -522,11 +522,9 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, } // Otherwise, there is an index. The computation we will do will be modulo - // the pointer size, so get it. - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - - Offset &= PtrSizeMask; - VariableScale &= PtrSizeMask; + // the pointer size. + Offset = SignExtend64(Offset, IntPtrWidth); + VariableScale = SignExtend64(VariableScale, IntPtrWidth); // To do this transformation, any constant index must be a multiple of the // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", @@ -909,7 +907,8 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } // If all indices are the same, just compare the base pointers. - if (IndicesTheSame) + Type *BaseType = GEPLHS->getOperand(0)->getType(); + if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType()) return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0)); // If we're comparing GEPs with two base pointers that only differ in type @@ -976,7 +975,7 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (NumDifferences == 0) // SAME GEP? return replaceInstUsesWith(I, // No comparison is needed here. - Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond))); + ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond))); else if (NumDifferences == 1 && GEPsInBounds) { Value *LHSV = GEPLHS->getOperand(DiffOperand); @@ -1079,19 +1078,20 @@ Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI, ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate()))); } -/// Fold "icmp pred (X+CI), X". -Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI, +/// Fold "icmp pred (X+C), X". +Instruction *InstCombiner::foldICmpAddOpConst(Value *X, const APInt &C, ICmpInst::Predicate Pred) { // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, // so the values can never be equal. Similarly for all other "or equals" // operators. + assert(!!C && "C should not be zero!"); // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { - Value *R = - ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); + Constant *R = ConstantInt::get(X->getType(), + APInt::getMaxValue(C.getBitWidth()) - C); return new ICmpInst(ICmpInst::ICMP_UGT, X, R); } @@ -1099,11 +1099,10 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI, // (X+2) >u X --> X <u (0-2) --> X <u 254 // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) - return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); + return new ICmpInst(ICmpInst::ICMP_ULT, X, + ConstantInt::get(X->getType(), -C)); - unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); - ConstantInt *SMax = ConstantInt::get(X->getContext(), - APInt::getSignedMaxValue(BitWidth)); + APInt SMax = APInt::getSignedMaxValue(C.getBitWidth()); // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 @@ -1112,7 +1111,8 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI, // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) - return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); + return new ICmpInst(ICmpInst::ICMP_SGT, X, + ConstantInt::get(X->getType(), SMax - C)); // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 @@ -1122,8 +1122,8 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI, // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); - Constant *C = Builder.getInt(CI->getValue() - 1); - return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); + return new ICmpInst(ICmpInst::ICMP_SLT, X, + ConstantInt::get(X->getType(), SMax - (C - 1))); } /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" -> @@ -1333,17 +1333,12 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { return nullptr; } -// Fold icmp Pred X, C. +/// Fold icmp Pred X, C. +/// TODO: This code structure does not make sense. The saturating add fold +/// should be moved to some other helper and extended as noted below (it is also +/// possible that code has been made unnecessary - do we canonicalize IR to +/// overflow/saturating intrinsics or not?). Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { - CmpInst::Predicate Pred = Cmp.getPredicate(); - Value *X = Cmp.getOperand(0); - - const APInt *C; - if (!match(Cmp.getOperand(1), m_APInt(C))) - return nullptr; - - Value *A = nullptr, *B = nullptr; - // Match the following pattern, which is a common idiom when writing // overflow-safe integer arithmetic functions. The source performs an addition // in wider type and explicitly checks for overflow using comparisons against @@ -1355,37 +1350,62 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { // // sum = a + b // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 - { - ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI - if (Pred == ICmpInst::ICMP_UGT && - match(X, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) - if (Instruction *Res = processUGT_ADDCST_ADD( - Cmp, A, B, CI2, cast<ConstantInt>(Cmp.getOperand(1)), *this)) - return Res; - } + CmpInst::Predicate Pred = Cmp.getPredicate(); + Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1); + Value *A, *B; + ConstantInt *CI, *CI2; // I = icmp ugt (add (add A, B), CI2), CI + if (Pred == ICmpInst::ICMP_UGT && match(Op1, m_ConstantInt(CI)) && + match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) + if (Instruction *Res = processUGT_ADDCST_ADD(Cmp, A, B, CI2, CI, *this)) + return Res; + + return nullptr; +} - // FIXME: Use m_APInt to allow folds for splat constants. - ConstantInt *CI = dyn_cast<ConstantInt>(Cmp.getOperand(1)); - if (!CI) +/// Canonicalize icmp instructions based on dominating conditions. +Instruction *InstCombiner::foldICmpWithDominatingICmp(ICmpInst &Cmp) { + // This is a cheap/incomplete check for dominance - just match a single + // predecessor with a conditional branch. + BasicBlock *CmpBB = Cmp.getParent(); + BasicBlock *DomBB = CmpBB->getSinglePredecessor(); + if (!DomBB) return nullptr; - // Canonicalize icmp instructions based on dominating conditions. - BasicBlock *Parent = Cmp.getParent(); - BasicBlock *Dom = Parent->getSinglePredecessor(); - auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr; - ICmpInst::Predicate Pred2; + Value *DomCond; BasicBlock *TrueBB, *FalseBB; - ConstantInt *CI2; - if (BI && match(BI, m_Br(m_ICmp(Pred2, m_Specific(X), m_ConstantInt(CI2)), - TrueBB, FalseBB)) && - TrueBB != FalseBB) { - ConstantRange CR = - ConstantRange::makeAllowedICmpRegion(Pred, CI->getValue()); + if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB))) + return nullptr; + + assert((TrueBB == CmpBB || FalseBB == CmpBB) && + "Predecessor block does not point to successor?"); + + // The branch should get simplified. Don't bother simplifying this condition. + if (TrueBB == FalseBB) + return nullptr; + + // Try to simplify this compare to T/F based on the dominating condition. + Optional<bool> Imp = isImpliedCondition(DomCond, &Cmp, DL, TrueBB == CmpBB); + if (Imp) + return replaceInstUsesWith(Cmp, ConstantInt::get(Cmp.getType(), *Imp)); + + CmpInst::Predicate Pred = Cmp.getPredicate(); + Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1); + ICmpInst::Predicate DomPred; + const APInt *C, *DomC; + if (match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))) && + match(Y, m_APInt(C))) { + // We have 2 compares of a variable with constants. Calculate the constant + // ranges of those compares to see if we can transform the 2nd compare: + // DomBB: + // DomCond = icmp DomPred X, DomC + // br DomCond, CmpBB, FalseBB + // CmpBB: + // Cmp = icmp Pred X, C + ConstantRange CR = ConstantRange::makeAllowedICmpRegion(Pred, *C); ConstantRange DominatingCR = - (Parent == TrueBB) - ? ConstantRange::makeExactICmpRegion(Pred2, CI2->getValue()) - : ConstantRange::makeExactICmpRegion( - CmpInst::getInversePredicate(Pred2), CI2->getValue()); + (CmpBB == TrueBB) ? ConstantRange::makeExactICmpRegion(DomPred, *DomC) + : ConstantRange::makeExactICmpRegion( + CmpInst::getInversePredicate(DomPred), *DomC); ConstantRange Intersection = DominatingCR.intersectWith(CR); ConstantRange Difference = DominatingCR.difference(CR); if (Intersection.isEmptySet()) @@ -1393,23 +1413,20 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { if (Difference.isEmptySet()) return replaceInstUsesWith(Cmp, Builder.getTrue()); - // If this is a normal comparison, it demands all bits. If it is a sign - // bit comparison, it only demands the sign bit. - bool UnusedBit; - bool IsSignBit = isSignBitCheck(Pred, CI->getValue(), UnusedBit); - // Canonicalizing a sign bit comparison that gets used in a branch, // pessimizes codegen by generating branch on zero instruction instead // of a test and branch. So we avoid canonicalizing in such situations // because test and branch instruction has better branch displacement // than compare and branch instruction. + bool UnusedBit; + bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit); if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp))) return nullptr; - if (auto *AI = Intersection.getSingleElement()) - return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI)); - if (auto *AD = Difference.getSingleElement()) - return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD)); + if (const APInt *EqC = Intersection.getSingleElement()) + return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC)); + if (const APInt *NeC = Difference.getSingleElement()) + return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC)); } return nullptr; @@ -1498,16 +1515,25 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, } } - // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C) - // iff -C is a power of 2 - if (Pred == ICmpInst::ICMP_UGT && *XorC == ~C && (C + 1).isPowerOf2()) - return new ICmpInst(ICmpInst::ICMP_ULT, X, Y); - - // (icmp ult (xor X, C), -C) -> (icmp uge X, C) - // iff -C is a power of 2 - if (Pred == ICmpInst::ICMP_ULT && *XorC == -C && C.isPowerOf2()) - return new ICmpInst(ICmpInst::ICMP_UGE, X, Y); - + // Mask constant magic can eliminate an 'xor' with unsigned compares. + if (Pred == ICmpInst::ICMP_UGT) { + // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2) + if (*XorC == ~C && (C + 1).isPowerOf2()) + return new ICmpInst(ICmpInst::ICMP_ULT, X, Y); + // (xor X, C) >u C --> X >u C (when C+1 is a power of 2) + if (*XorC == C && (C + 1).isPowerOf2()) + return new ICmpInst(ICmpInst::ICMP_UGT, X, Y); + } + if (Pred == ICmpInst::ICMP_ULT) { + // (xor X, -C) <u C --> X >u ~C (when C is a power of 2) + if (*XorC == -C && C.isPowerOf2()) + return new ICmpInst(ICmpInst::ICMP_UGT, X, + ConstantInt::get(X->getType(), ~C)); + // (xor X, C) <u C --> X >u ~C (when -C is a power of 2) + if (*XorC == C && (-C).isPowerOf2()) + return new ICmpInst(ICmpInst::ICMP_UGT, X, + ConstantInt::get(X->getType(), ~C)); + } return nullptr; } @@ -1598,6 +1624,13 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, BinaryOperator *And, const APInt &C1) { + // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1 + // TODO: We canonicalize to the longer form for scalars because we have + // better analysis/folds for icmp, and codegen may be better with icmp. + if (Cmp.getPredicate() == CmpInst::ICMP_NE && Cmp.getType()->isVectorTy() && + C1.isNullValue() && match(And->getOperand(1), m_One())) + return new TruncInst(And->getOperand(0), Cmp.getType()); + const APInt *C2; if (!match(And->getOperand(1), m_APInt(C2))) return nullptr; @@ -2336,13 +2369,19 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, Type *Ty = Add->getType(); CmpInst::Predicate Pred = Cmp.getPredicate(); + if (!Add->hasOneUse()) + return nullptr; + // If the add does not wrap, we can always adjust the compare by subtracting - // the constants. Equality comparisons are handled elsewhere. SGE/SLE are - // canonicalized to SGT/SLT. - if (Add->hasNoSignedWrap() && - (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) { + // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE + // are canonicalized to SGT/SLT/UGT/ULT. + if ((Add->hasNoSignedWrap() && + (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) || + (Add->hasNoUnsignedWrap() && + (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) { bool Overflow; - APInt NewC = C.ssub_ov(*C2, Overflow); + APInt NewC = + Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow); // If there is overflow, the result must be true or false. // TODO: Can we assert there is no overflow because InstSimplify always // handles those cases? @@ -2366,9 +2405,6 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower)); } - if (!Add->hasOneUse()) - return nullptr; - // X+C <u C2 -> (X & -C2) == C // iff C & (C2-1) == 0 // C2 is a power of 2 @@ -2729,6 +2765,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, // Handle icmp {eq|ne} <intrinsic>, Constant. Type *Ty = II->getType(); + unsigned BitWidth = C.getBitWidth(); switch (II->getIntrinsicID()) { case Intrinsic::bswap: Worklist.Add(II); @@ -2737,21 +2774,39 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, return &Cmp; case Intrinsic::ctlz: - case Intrinsic::cttz: + case Intrinsic::cttz: { // ctz(A) == bitwidth(A) -> A == 0 and likewise for != - if (C == C.getBitWidth()) { + if (C == BitWidth) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); Cmp.setOperand(1, ConstantInt::getNullValue(Ty)); return &Cmp; } + + // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set + // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits. + // Limit to one use to ensure we don't increase instruction count. + unsigned Num = C.getLimitedValue(BitWidth); + if (Num != BitWidth && II->hasOneUse()) { + bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz; + APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1) + : APInt::getHighBitsSet(BitWidth, Num + 1); + APInt Mask2 = IsTrailing + ? APInt::getOneBitSet(BitWidth, Num) + : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1); + Cmp.setOperand(0, Builder.CreateAnd(II->getArgOperand(0), Mask1)); + Cmp.setOperand(1, ConstantInt::get(Ty, Mask2)); + Worklist.Add(II); + return &Cmp; + } break; + } case Intrinsic::ctpop: { // popcount(A) == 0 -> A == 0 and likewise for != // popcount(A) == bitwidth(A) -> A == -1 and likewise for != bool IsZero = C.isNullValue(); - if (IsZero || C == C.getBitWidth()) { + if (IsZero || C == BitWidth) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); auto *NewOp = @@ -2870,15 +2925,25 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) { /// In this case, we are looking for comparisons that look like /// a check for a lossy truncation. /// Folds: -/// x & (-1 >> y) SrcPred x to x DstPred (-1 >> y) +/// icmp SrcPred (x & Mask), x to icmp DstPred x, Mask +/// Where Mask is some pattern that produces all-ones in low bits: +/// (-1 >> y) +/// ((-1 << y) >> y) <- non-canonical, has extra uses +/// ~(-1 << y) +/// ((1 << y) + (-1)) <- non-canonical, has extra uses /// The Mask can be a constant, too. /// For some predicates, the operands are commutative. /// For others, x can only be on a specific side. static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I, InstCombiner::BuilderTy &Builder) { ICmpInst::Predicate SrcPred; - Value *X, *M; - auto m_Mask = m_CombineOr(m_LShr(m_AllOnes(), m_Value()), m_LowBitMask()); + Value *X, *M, *Y; + auto m_VariableMask = m_CombineOr( + m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())), + m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())), + m_CombineOr(m_LShr(m_AllOnes(), m_Value()), + m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y)))); + auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask()); if (!match(&I, m_c_ICmp(SrcPred, m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)), m_Deferred(X)))) @@ -2924,12 +2989,20 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I, // x & (-1 >> y) s>= x -> x s<= (-1 >> y) if (X != I.getOperand(1)) // X must be on RHS of comparison! return nullptr; // Ignore the other case. + if (!match(M, m_Constant())) // Can not do this fold with non-constant. + return nullptr; + if (!match(M, m_NonNegative())) // Must not have any -1 vector elements. + return nullptr; DstPred = ICmpInst::Predicate::ICMP_SLE; break; case ICmpInst::Predicate::ICMP_SLT: // x & (-1 >> y) s< x -> x s> (-1 >> y) if (X != I.getOperand(1)) // X must be on RHS of comparison! return nullptr; // Ignore the other case. + if (!match(M, m_Constant())) // Can not do this fold with non-constant. + return nullptr; + if (!match(M, m_NonNegative())) // Must not have any -1 vector elements. + return nullptr; DstPred = ICmpInst::Predicate::ICMP_SGT; break; case ICmpInst::Predicate::ICMP_SLE: @@ -3034,6 +3107,18 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { return nullptr; const CmpInst::Predicate Pred = I.getPredicate(); + Value *X; + + // Convert add-with-unsigned-overflow comparisons into a 'not' with compare. + // (Op1 + X) <u Op1 --> ~Op1 <u X + // Op0 >u (Op0 + X) --> X >u ~Op0 + if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) && + Pred == ICmpInst::ICMP_ULT) + return new ICmpInst(Pred, Builder.CreateNot(Op1), X); + if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) && + Pred == ICmpInst::ICMP_UGT) + return new ICmpInst(Pred, X, Builder.CreateNot(Op0)); + bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; if (BO0 && isa<OverflowingBinaryOperator>(BO0)) NoOp0WrapProblem = @@ -4598,6 +4683,83 @@ static Instruction *canonicalizeICmpBool(ICmpInst &I, } } +// Transform pattern like: +// (1 << Y) u<= X or ~(-1 << Y) u< X or ((1 << Y)+(-1)) u< X +// (1 << Y) u> X or ~(-1 << Y) u>= X or ((1 << Y)+(-1)) u>= X +// Into: +// (X l>> Y) != 0 +// (X l>> Y) == 0 +static Instruction *foldICmpWithHighBitMask(ICmpInst &Cmp, + InstCombiner::BuilderTy &Builder) { + ICmpInst::Predicate Pred, NewPred; + Value *X, *Y; + if (match(&Cmp, + m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) { + // We want X to be the icmp's second operand, so swap predicate if it isn't. + if (Cmp.getOperand(0) == X) + Pred = Cmp.getSwappedPredicate(); + + switch (Pred) { + case ICmpInst::ICMP_ULE: + NewPred = ICmpInst::ICMP_NE; + break; + case ICmpInst::ICMP_UGT: + NewPred = ICmpInst::ICMP_EQ; + break; + default: + return nullptr; + } + } else if (match(&Cmp, m_c_ICmp(Pred, + m_OneUse(m_CombineOr( + m_Not(m_Shl(m_AllOnes(), m_Value(Y))), + m_Add(m_Shl(m_One(), m_Value(Y)), + m_AllOnes()))), + m_Value(X)))) { + // The variant with 'add' is not canonical, (the variant with 'not' is) + // we only get it because it has extra uses, and can't be canonicalized, + + // We want X to be the icmp's second operand, so swap predicate if it isn't. + if (Cmp.getOperand(0) == X) + Pred = Cmp.getSwappedPredicate(); + + switch (Pred) { + case ICmpInst::ICMP_ULT: + NewPred = ICmpInst::ICMP_NE; + break; + case ICmpInst::ICMP_UGE: + NewPred = ICmpInst::ICMP_EQ; + break; + default: + return nullptr; + } + } else + return nullptr; + + Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits"); + Constant *Zero = Constant::getNullValue(NewX->getType()); + return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero); +} + +static Instruction *foldVectorCmp(CmpInst &Cmp, + InstCombiner::BuilderTy &Builder) { + // If both arguments of the cmp are shuffles that use the same mask and + // shuffle within a single vector, move the shuffle after the cmp. + Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1); + Value *V1, *V2; + Constant *M; + if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(M))) && + match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(M))) && + V1->getType() == V2->getType() && + (LHS->hasOneUse() || RHS->hasOneUse())) { + // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M + CmpInst::Predicate P = Cmp.getPredicate(); + Value *NewCmp = isa<ICmpInst>(Cmp) ? Builder.CreateICmp(P, V1, V2) + : Builder.CreateFCmp(P, V1, V2); + return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()), M); + } + return nullptr; +} + Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -4645,6 +4807,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpWithConstant(I)) return Res; + if (Instruction *Res = foldICmpWithDominatingICmp(I)) + return Res; + if (Instruction *Res = foldICmpUsingKnownBits(I)) return Res; @@ -4857,16 +5022,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return ExtractValueInst::Create(ACXI, 1); { - Value *X; ConstantInt *Cst; + Value *X; + const APInt *C; // icmp X+Cst, X - if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) - return foldICmpAddOpConst(X, Cst, I.getPredicate()); + if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X) + return foldICmpAddOpConst(X, *C, I.getPredicate()); // icmp X, X+Cst - if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) - return foldICmpAddOpConst(X, Cst, I.getSwappedPredicate()); + if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X) + return foldICmpAddOpConst(X, *C, I.getSwappedPredicate()); } + if (Instruction *Res = foldICmpWithHighBitMask(I, Builder)) + return Res; + + if (I.getType()->isVectorTy()) + if (Instruction *Res = foldVectorCmp(I, Builder)) + return Res; + return Changed ? &I : nullptr; } @@ -5109,6 +5282,117 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); } +/// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary. +static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI, + Constant *RHSC) { + // When C is not 0.0 and infinities are not allowed: + // (C / X) < 0.0 is a sign-bit test of X + // (C / X) < 0.0 --> X < 0.0 (if C is positive) + // (C / X) < 0.0 --> X > 0.0 (if C is negative, swap the predicate) + // + // Proof: + // Multiply (C / X) < 0.0 by X * X / C. + // - X is non zero, if it is the flag 'ninf' is violated. + // - C defines the sign of X * X * C. Thus it also defines whether to swap + // the predicate. C is also non zero by definition. + // + // Thus X * X / C is non zero and the transformation is valid. [qed] + + FCmpInst::Predicate Pred = I.getPredicate(); + + // Check that predicates are valid. + if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) && + (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE)) + return nullptr; + + // Check that RHS operand is zero. + if (!match(RHSC, m_AnyZeroFP())) + return nullptr; + + // Check fastmath flags ('ninf'). + if (!LHSI->hasNoInfs() || !I.hasNoInfs()) + return nullptr; + + // Check the properties of the dividend. It must not be zero to avoid a + // division by zero (see Proof). + const APFloat *C; + if (!match(LHSI->getOperand(0), m_APFloat(C))) + return nullptr; + + if (C->isZero()) + return nullptr; + + // Get swapped predicate if necessary. + if (C->isNegative()) + Pred = I.getSwappedPredicate(); + + return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I); +} + +/// Optimize fabs(X) compared with zero. +static Instruction *foldFabsWithFcmpZero(FCmpInst &I) { + Value *X; + if (!match(I.getOperand(0), m_Intrinsic<Intrinsic::fabs>(m_Value(X))) || + !match(I.getOperand(1), m_PosZeroFP())) + return nullptr; + + auto replacePredAndOp0 = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) { + I->setPredicate(P); + I->setOperand(0, X); + return I; + }; + + switch (I.getPredicate()) { + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OLT: + // fabs(X) >= 0.0 --> true + // fabs(X) < 0.0 --> false + llvm_unreachable("fcmp should have simplified"); + + case FCmpInst::FCMP_OGT: + // fabs(X) > 0.0 --> X != 0.0 + return replacePredAndOp0(&I, FCmpInst::FCMP_ONE, X); + + case FCmpInst::FCMP_UGT: + // fabs(X) u> 0.0 --> X u!= 0.0 + return replacePredAndOp0(&I, FCmpInst::FCMP_UNE, X); + + case FCmpInst::FCMP_OLE: + // fabs(X) <= 0.0 --> X == 0.0 + return replacePredAndOp0(&I, FCmpInst::FCMP_OEQ, X); + + case FCmpInst::FCMP_ULE: + // fabs(X) u<= 0.0 --> X u== 0.0 + return replacePredAndOp0(&I, FCmpInst::FCMP_UEQ, X); + + case FCmpInst::FCMP_OGE: + // fabs(X) >= 0.0 --> !isnan(X) + assert(!I.hasNoNaNs() && "fcmp should have simplified"); + return replacePredAndOp0(&I, FCmpInst::FCMP_ORD, X); + + case FCmpInst::FCMP_ULT: + // fabs(X) u< 0.0 --> isnan(X) + assert(!I.hasNoNaNs() && "fcmp should have simplified"); + return replacePredAndOp0(&I, FCmpInst::FCMP_UNO, X); + + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_ONE: + case FCmpInst::FCMP_UNE: + case FCmpInst::FCMP_ORD: + case FCmpInst::FCMP_UNO: + // Look through the fabs() because it doesn't change anything but the sign. + // fabs(X) == 0.0 --> X == 0.0, + // fabs(X) != 0.0 --> X != 0.0 + // isnan(fabs(X)) --> isnan(X) + // !isnan(fabs(X) --> !isnan(X) + return replacePredAndOp0(&I, I.getPredicate(), X); + + default: + return nullptr; + } +} + Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { bool Changed = false; @@ -5153,11 +5437,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { // If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand, // then canonicalize the operand to 0.0. if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) { - if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0)) { + if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, &TLI)) { I.setOperand(0, ConstantFP::getNullValue(Op0->getType())); return &I; } - if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1)) { + if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1, &TLI)) { I.setOperand(1, ConstantFP::getNullValue(Op0->getType())); return &I; } @@ -5178,128 +5462,93 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return nullptr; } - // Handle fcmp with constant RHS - if (Constant *RHSC = dyn_cast<Constant>(Op1)) { - if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) - switch (LHSI->getOpcode()) { - case Instruction::FPExt: { - // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless - FPExtInst *LHSExt = cast<FPExtInst>(LHSI); - ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC); - if (!RHSF) - break; - - const fltSemantics *Sem; - // FIXME: This shouldn't be here. - if (LHSExt->getSrcTy()->isHalfTy()) - Sem = &APFloat::IEEEhalf(); - else if (LHSExt->getSrcTy()->isFloatTy()) - Sem = &APFloat::IEEEsingle(); - else if (LHSExt->getSrcTy()->isDoubleTy()) - Sem = &APFloat::IEEEdouble(); - else if (LHSExt->getSrcTy()->isFP128Ty()) - Sem = &APFloat::IEEEquad(); - else if (LHSExt->getSrcTy()->isX86_FP80Ty()) - Sem = &APFloat::x87DoubleExtended(); - else if (LHSExt->getSrcTy()->isPPC_FP128Ty()) - Sem = &APFloat::PPCDoubleDouble(); - else - break; - - bool Lossy; - APFloat F = RHSF->getValueAPF(); - F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); - - // Avoid lossy conversions and denormals. Zero is a special case - // that's OK to convert. - APFloat Fabs = F; - Fabs.clearSign(); - if (!Lossy && - ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) != - APFloat::cmpLessThan) || Fabs.isZero())) - - return new FCmpInst(Pred, LHSExt->getOperand(0), - ConstantFP::get(RHSC->getContext(), F)); - break; - } - case Instruction::PHI: - // Only fold fcmp into the PHI if the phi and fcmp are in the same - // block. If in the same block, we're encouraging jump threading. If - // not, we are just pessimizing the code by making an i1 phi. - if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI))) - return NV; - break; - case Instruction::SIToFP: - case Instruction::UIToFP: - if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC)) + // The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0: + // fcmp Pred X, -0.0 --> fcmp Pred X, 0.0 + if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP())) { + I.setOperand(1, ConstantFP::getNullValue(Op1->getType())); + return &I; + } + + // Handle fcmp with instruction LHS and constant RHS. + Instruction *LHSI; + Constant *RHSC; + if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) { + switch (LHSI->getOpcode()) { + case Instruction::PHI: + // Only fold fcmp into the PHI if the phi and fcmp are in the same + // block. If in the same block, we're encouraging jump threading. If + // not, we are just pessimizing the code by making an i1 phi. + if (LHSI->getParent() == I.getParent()) + if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI))) return NV; - break; - case Instruction::FSub: { - // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C - Value *Op; - if (match(LHSI, m_FNeg(m_Value(Op)))) - return new FCmpInst(I.getSwappedPredicate(), Op, - ConstantExpr::getFNeg(RHSC)); - break; - } - case Instruction::Load: - if (GetElementPtrInst *GEP = - dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer() && - !cast<LoadInst>(LHSI)->isVolatile()) - if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I)) - return Res; - } - break; - case Instruction::Call: { - if (!RHSC->isNullValue()) - break; + break; + case Instruction::SIToFP: + case Instruction::UIToFP: + if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC)) + return NV; + break; + case Instruction::FDiv: + if (Instruction *NV = foldFCmpReciprocalAndZero(I, LHSI, RHSC)) + return NV; + break; + case Instruction::Load: + if (auto *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) + if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !cast<LoadInst>(LHSI)->isVolatile()) + if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I)) + return Res; + break; + } + } - CallInst *CI = cast<CallInst>(LHSI); - Intrinsic::ID IID = getIntrinsicForCallSite(CI, &TLI); - if (IID != Intrinsic::fabs) - break; + if (Instruction *R = foldFabsWithFcmpZero(I)) + return R; - // Various optimization for fabs compared with zero. - switch (Pred) { - default: - break; - // fabs(x) < 0 --> false - case FCmpInst::FCMP_OLT: - llvm_unreachable("handled by SimplifyFCmpInst"); - // fabs(x) > 0 --> x != 0 - case FCmpInst::FCMP_OGT: - return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), RHSC); - // fabs(x) <= 0 --> x == 0 - case FCmpInst::FCMP_OLE: - return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), RHSC); - // fabs(x) >= 0 --> !isnan(x) - case FCmpInst::FCMP_OGE: - return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), RHSC); - // fabs(x) == 0 --> x == 0 - // fabs(x) != 0 --> x != 0 - case FCmpInst::FCMP_OEQ: - case FCmpInst::FCMP_UEQ: - case FCmpInst::FCMP_ONE: - case FCmpInst::FCMP_UNE: - return new FCmpInst(Pred, CI->getArgOperand(0), RHSC); - } - } + Value *X, *Y; + if (match(Op0, m_FNeg(m_Value(X)))) { + // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y + if (match(Op1, m_FNeg(m_Value(Y)))) + return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I); + + // fcmp pred (fneg X), C --> fcmp swap(pred) X, -C + Constant *C; + if (match(Op1, m_Constant(C))) { + Constant *NegC = ConstantExpr::getFNeg(C); + return new FCmpInst(I.getSwappedPredicate(), X, NegC, "", &I); + } + } + + if (match(Op0, m_FPExt(m_Value(X)))) { + // fcmp (fpext X), (fpext Y) -> fcmp X, Y + if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType()) + return new FCmpInst(Pred, X, Y, "", &I); + + // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless + const APFloat *C; + if (match(Op1, m_APFloat(C))) { + const fltSemantics &FPSem = + X->getType()->getScalarType()->getFltSemantics(); + bool Lossy; + APFloat TruncC = *C; + TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy); + + // Avoid lossy conversions and denormals. + // Zero is a special case that's OK to convert. + APFloat Fabs = TruncC; + Fabs.clearSign(); + if (!Lossy && + ((Fabs.compare(APFloat::getSmallestNormalized(FPSem)) != + APFloat::cmpLessThan) || Fabs.isZero())) { + Constant *NewC = ConstantFP::get(X->getType(), TruncC); + return new FCmpInst(Pred, X, NewC, "", &I); } + } } - // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y - Value *X, *Y; - if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) - return new FCmpInst(I.getSwappedPredicate(), X, Y); - - // fcmp (fpext x), (fpext y) -> fcmp x, y - if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0)) - if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1)) - if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) - return new FCmpInst(Pred, LHSExt->getOperand(0), RHSExt->getOperand(0)); + if (I.getType()->isVectorTy()) + if (Instruction *Res = foldVectorCmp(I, Builder)) + return Res; return Changed ? &I : nullptr; } |
