summaryrefslogtreecommitdiff
path: root/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineAndOrXor.cpp')
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp534
1 files changed, 456 insertions, 78 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2364202e5b69..372bc41f780e 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -14,10 +14,10 @@
#include "InstCombineInternal.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace PatternMatch;
@@ -75,7 +75,7 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
return Builder.CreateFCmp(Pred, LHS, RHS);
}
-/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
+/// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B))
/// \param I Binary operator to transform.
/// \return Pointer to node that must replace the original binary operator, or
@@ -305,17 +305,21 @@ static bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pre
}
/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E).
-/// Return the set of pattern classes (from MaskedICmpType) that both LHS and
-/// RHS satisfy.
-static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
- Value *&D, Value *&E, ICmpInst *LHS,
- ICmpInst *RHS,
- ICmpInst::Predicate &PredL,
- ICmpInst::Predicate &PredR) {
+/// Return the pattern classes (from MaskedICmpType) for the left hand side and
+/// the right hand side as a pair.
+/// LHS and RHS are the left hand side and the right hand side ICmps and PredL
+/// and PredR are their predicates, respectively.
+static
+Optional<std::pair<unsigned, unsigned>>
+getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
+ Value *&D, Value *&E, ICmpInst *LHS,
+ ICmpInst *RHS,
+ ICmpInst::Predicate &PredL,
+ ICmpInst::Predicate &PredR) {
// vectors are not (yet?) supported. Don't support pointers either.
if (!LHS->getOperand(0)->getType()->isIntegerTy() ||
!RHS->getOperand(0)->getType()->isIntegerTy())
- return 0;
+ return None;
// Here comes the tricky part:
// LHS might be of the form L11 & L12 == X, X == L21 & L22,
@@ -346,7 +350,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
// Bail if LHS was a icmp that can't be decomposed into an equality.
if (!ICmpInst::isEquality(PredL))
- return 0;
+ return None;
Value *R1 = RHS->getOperand(0);
Value *R2 = RHS->getOperand(1);
@@ -360,7 +364,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
A = R12;
D = R11;
} else {
- return 0;
+ return None;
}
E = R2;
R1 = nullptr;
@@ -388,7 +392,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
// Bail if RHS was a icmp that can't be decomposed into an equality.
if (!ICmpInst::isEquality(PredR))
- return 0;
+ return None;
// Look for ANDs on the right side of the RHS icmp.
if (!Ok) {
@@ -408,11 +412,11 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
E = R1;
Ok = true;
} else {
- return 0;
+ return None;
}
}
if (!Ok)
- return 0;
+ return None;
if (L11 == A) {
B = L12;
@@ -430,7 +434,174 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
unsigned LeftType = getMaskedICmpType(A, B, C, PredL);
unsigned RightType = getMaskedICmpType(A, D, E, PredR);
- return LeftType & RightType;
+ return Optional<std::pair<unsigned, unsigned>>(std::make_pair(LeftType, RightType));
+}
+
+/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single
+/// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros
+/// and the right hand side is of type BMask_Mixed. For example,
+/// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8).
+static Value * foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
+ ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
+ Value *A, Value *B, Value *C, Value *D, Value *E,
+ ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
+ llvm::InstCombiner::BuilderTy &Builder) {
+ // We are given the canonical form:
+ // (icmp ne (A & B), 0) & (icmp eq (A & D), E).
+ // where D & E == E.
+ //
+ // If IsAnd is false, we get it in negated form:
+ // (icmp eq (A & B), 0) | (icmp ne (A & D), E) ->
+ // !((icmp ne (A & B), 0) & (icmp eq (A & D), E)).
+ //
+ // We currently handle the case of B, C, D, E are constant.
+ //
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (!BCst)
+ return nullptr;
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ if (!CCst)
+ return nullptr;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (!DCst)
+ return nullptr;
+ ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+ if (!ECst)
+ return nullptr;
+
+ ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+
+ // Update E to the canonical form when D is a power of two and RHS is
+ // canonicalized as,
+ // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or
+ // (icmp ne (A & D), D) -> (icmp eq (A & D), 0).
+ if (PredR != NewCC)
+ ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
+
+ // If B or D is zero, skip because if LHS or RHS can be trivially folded by
+ // other folding rules and this pattern won't apply any more.
+ if (BCst->getValue() == 0 || DCst->getValue() == 0)
+ return nullptr;
+
+ // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't
+ // deduce anything from it.
+ // For example,
+ // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding.
+ if ((BCst->getValue() & DCst->getValue()) == 0)
+ return nullptr;
+
+ // If the following two conditions are met:
+ //
+ // 1. mask B covers only a single bit that's not covered by mask D, that is,
+ // (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of
+ // B and D has only one bit set) and,
+ //
+ // 2. RHS (and E) indicates that the rest of B's bits are zero (in other
+ // words, the intersection of B and D is zero), that is, ((B & D) & E) == 0
+ //
+ // then that single bit in B must be one and thus the whole expression can be
+ // folded to
+ // (A & (B | D)) == (B & (B ^ D)) | E.
+ //
+ // For example,
+ // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9)
+ // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8)
+ if ((((BCst->getValue() & DCst->getValue()) & ECst->getValue()) == 0) &&
+ (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())).isPowerOf2()) {
+ APInt BorD = BCst->getValue() | DCst->getValue();
+ APInt BandBxorDorE = (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())) |
+ ECst->getValue();
+ Value *NewMask = ConstantInt::get(BCst->getType(), BorD);
+ Value *NewMaskedValue = ConstantInt::get(BCst->getType(), BandBxorDorE);
+ Value *NewAnd = Builder.CreateAnd(A, NewMask);
+ return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue);
+ }
+
+ auto IsSubSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) {
+ return (C1->getValue() & C2->getValue()) == C1->getValue();
+ };
+ auto IsSuperSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) {
+ return (C1->getValue() & C2->getValue()) == C2->getValue();
+ };
+
+ // In the following, we consider only the cases where B is a superset of D, B
+ // is a subset of D, or B == D because otherwise there's at least one bit
+ // covered by B but not D, in which case we can't deduce much from it, so
+ // no folding (aside from the single must-be-one bit case right above.)
+ // For example,
+ // (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding.
+ if (!IsSubSetOrEqual(BCst, DCst) && !IsSuperSetOrEqual(BCst, DCst))
+ return nullptr;
+
+ // At this point, either B is a superset of D, B is a subset of D or B == D.
+
+ // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict
+ // and the whole expression becomes false (or true if negated), otherwise, no
+ // folding.
+ // For example,
+ // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false.
+ // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding.
+ if (ECst->isZero()) {
+ if (IsSubSetOrEqual(BCst, DCst))
+ return ConstantInt::get(LHS->getType(), !IsAnd);
+ return nullptr;
+ }
+
+ // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B ==
+ // D. If B is a superset of (or equal to) D, since E is not zero, LHS is
+ // subsumed by RHS (RHS implies LHS.) So the whole expression becomes
+ // RHS. For example,
+ // (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
+ // (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
+ if (IsSuperSetOrEqual(BCst, DCst))
+ return RHS;
+ // Otherwise, B is a subset of D. If B and E have a common bit set,
+ // ie. (B & E) != 0, then LHS is subsumed by RHS. For example.
+ // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
+ assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code");
+ if ((BCst->getValue() & ECst->getValue()) != 0)
+ return RHS;
+ // Otherwise, LHS and RHS contradict and the whole expression becomes false
+ // (or true if negated.) For example,
+ // (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false.
+ // (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false.
+ return ConstantInt::get(LHS->getType(), !IsAnd);
+}
+
+/// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single
+/// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side
+/// aren't of the common mask pattern type.
+static Value *foldLogOpOfMaskedICmpsAsymmetric(
+ ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
+ Value *A, Value *B, Value *C, Value *D, Value *E,
+ ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
+ unsigned LHSMask, unsigned RHSMask,
+ llvm::InstCombiner::BuilderTy &Builder) {
+ assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
+ "Expected equality predicates for masked type of icmps.");
+ // Handle Mask_NotAllZeros-BMask_Mixed cases.
+ // (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or
+ // (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E)
+ // which gets swapped to
+ // (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C).
+ if (!IsAnd) {
+ LHSMask = conjugateICmpMask(LHSMask);
+ RHSMask = conjugateICmpMask(RHSMask);
+ }
+ if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) {
+ if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
+ LHS, RHS, IsAnd, A, B, C, D, E,
+ PredL, PredR, Builder)) {
+ return V;
+ }
+ } else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) {
+ if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
+ RHS, LHS, IsAnd, A, D, E, B, C,
+ PredR, PredL, Builder)) {
+ return V;
+ }
+ }
+ return nullptr;
}
/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
@@ -439,13 +610,24 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
llvm::InstCombiner::BuilderTy &Builder) {
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
- unsigned Mask =
+ Optional<std::pair<unsigned, unsigned>> MaskPair =
getMaskedTypeForICmpPair(A, B, C, D, E, LHS, RHS, PredL, PredR);
- if (Mask == 0)
+ if (!MaskPair)
return nullptr;
-
assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
"Expected equality predicates for masked type of icmps.");
+ unsigned LHSMask = MaskPair->first;
+ unsigned RHSMask = MaskPair->second;
+ unsigned Mask = LHSMask & RHSMask;
+ if (Mask == 0) {
+ // Even if the two sides don't share a common pattern, check if folding can
+ // still happen.
+ if (Value *V = foldLogOpOfMaskedICmpsAsymmetric(
+ LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask,
+ Builder))
+ return V;
+ return nullptr;
+ }
// In full generality:
// (icmp (A & B) Op C) | (icmp (A & D) Op E)
@@ -939,8 +1121,8 @@ Value *InstCombiner::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd)
return nullptr;
// FCmp canonicalization ensures that (fcmp ord/uno X, X) and
- // (fcmp ord/uno X, C) will be transformed to (fcmp X, 0.0).
- if (match(LHS1, m_Zero()) && LHS1 == RHS1)
+ // (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0).
+ if (match(LHS1, m_PosZeroFP()) && match(RHS1, m_PosZeroFP()))
// Ignore the constants because they are obviously not NANs:
// (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y)
// (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y)
@@ -1106,8 +1288,8 @@ static Instruction *foldAndToXor(BinaryOperator &I,
// Operand complexity canonicalization guarantees that the 'or' is Op0.
// (A | B) & ~(A & B) --> A ^ B
// (A | B) & ~(B & A) --> A ^ B
- if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B)))))
+ if (match(&I, m_BinOp(m_Or(m_Value(A), m_Value(B)),
+ m_Not(m_c_And(m_Deferred(A), m_Deferred(B))))))
return BinaryOperator::CreateXor(A, B);
// (A | ~B) & (~A | B) --> ~(A ^ B)
@@ -1115,8 +1297,8 @@ static Instruction *foldAndToXor(BinaryOperator &I,
// (~B | A) & (~A | B) --> ~(A ^ B)
// (~B | A) & (B | ~A) --> ~(A ^ B)
if (Op0->hasOneUse() || Op1->hasOneUse())
- if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B))))
+ if (match(&I, m_BinOp(m_c_Or(m_Value(A), m_Not(m_Value(B))),
+ m_c_Or(m_Not(m_Deferred(A)), m_Deferred(B)))))
return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
return nullptr;
@@ -1148,18 +1330,86 @@ static Instruction *foldOrToXor(BinaryOperator &I,
return nullptr;
}
+/// Return true if a constant shift amount is always less than the specified
+/// bit-width. If not, the shift could create poison in the narrower type.
+static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) {
+ if (auto *ScalarC = dyn_cast<ConstantInt>(C))
+ return ScalarC->getZExtValue() < BitWidth;
+
+ if (C->getType()->isVectorTy()) {
+ // Check each element of a constant vector.
+ unsigned NumElts = C->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Elt = C->getAggregateElement(i);
+ if (!Elt)
+ return false;
+ if (isa<UndefValue>(Elt))
+ continue;
+ auto *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI || CI->getZExtValue() >= BitWidth)
+ return false;
+ }
+ return true;
+ }
+
+ // The constant is a constant expression or unknown.
+ return false;
+}
+
+/// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and
+/// a common zext operand: and (binop (zext X), C), (zext X).
+Instruction *InstCombiner::narrowMaskedBinOp(BinaryOperator &And) {
+ // This transform could also apply to {or, and, xor}, but there are better
+ // folds for those cases, so we don't expect those patterns here. AShr is not
+ // handled because it should always be transformed to LShr in this sequence.
+ // The subtract transform is different because it has a constant on the left.
+ // Add/mul commute the constant to RHS; sub with constant RHS becomes add.
+ Value *Op0 = And.getOperand(0), *Op1 = And.getOperand(1);
+ Constant *C;
+ if (!match(Op0, m_OneUse(m_Add(m_Specific(Op1), m_Constant(C)))) &&
+ !match(Op0, m_OneUse(m_Mul(m_Specific(Op1), m_Constant(C)))) &&
+ !match(Op0, m_OneUse(m_LShr(m_Specific(Op1), m_Constant(C)))) &&
+ !match(Op0, m_OneUse(m_Shl(m_Specific(Op1), m_Constant(C)))) &&
+ !match(Op0, m_OneUse(m_Sub(m_Constant(C), m_Specific(Op1)))))
+ return nullptr;
+
+ Value *X;
+ if (!match(Op1, m_ZExt(m_Value(X))) || Op1->hasNUsesOrMore(3))
+ return nullptr;
+
+ Type *Ty = And.getType();
+ if (!isa<VectorType>(Ty) && !shouldChangeType(Ty, X->getType()))
+ return nullptr;
+
+ // If we're narrowing a shift, the shift amount must be safe (less than the
+ // width) in the narrower type. If the shift amount is greater, instsimplify
+ // usually handles that case, but we can't guarantee/assert it.
+ Instruction::BinaryOps Opc = cast<BinaryOperator>(Op0)->getOpcode();
+ if (Opc == Instruction::LShr || Opc == Instruction::Shl)
+ if (!canNarrowShiftAmt(C, X->getType()->getScalarSizeInBits()))
+ return nullptr;
+
+ // and (sub C, (zext X)), (zext X) --> zext (and (sub C', X), X)
+ // and (binop (zext X), C), (zext X) --> zext (and (binop X, C'), X)
+ Value *NewC = ConstantExpr::getTrunc(C, X->getType());
+ Value *NewBO = Opc == Instruction::Sub ? Builder.CreateBinOp(Opc, NewC, X)
+ : Builder.CreateBinOp(Opc, X, NewC);
+ return new ZExtInst(Builder.CreateAnd(NewBO, X), Ty);
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
- bool Changed = SimplifyAssociativeOrCommutative(I);
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
- if (Value *V = SimplifyVectorOp(I))
+ if (Value *V = SimplifyAndInst(I.getOperand(0), I.getOperand(1),
+ SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyAndInst(Op0, Op1, SQ.getWithInstruction(&I)))
- return replaceInstUsesWith(I, V);
+ if (SimplifyAssociativeOrCommutative(I))
+ return &I;
+
+ if (Instruction *X = foldShuffledBinop(I))
+ return X;
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
@@ -1177,6 +1427,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Value *V = SimplifyBSwap(I, Builder))
return replaceInstUsesWith(I, V);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
const APInt *C;
if (match(Op1, m_APInt(C))) {
Value *X, *Y;
@@ -1289,9 +1540,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
}
- if (isa<Constant>(Op1))
- if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
- return FoldedLogic;
+ if (Instruction *Z = narrowMaskedBinOp(I))
+ return Z;
+
+ if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
+ return FoldedLogic;
if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
return DeMorgan;
@@ -1397,7 +1650,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
A->getType()->isIntOrIntVectorTy(1))
return SelectInst::Create(A, Op0, Constant::getNullValue(I.getType()));
- return Changed ? &I : nullptr;
+ return nullptr;
}
/// Given an OR instruction, check to see if this is a bswap idiom. If so,
@@ -1424,7 +1677,18 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) &&
match(Op1, m_And(m_Value(), m_Value()));
- if (!OrOfOrs && !OrOfShifts && !OrOfAnds)
+ // (A << B) | (C & D) -> bswap if possible.
+ // The bigger pattern here is ((A & C1) << C2) | ((B >> C2) & C1), which is a
+ // part of the bswap idiom for specific values of C1, C2 (e.g. C1 = 16711935,
+ // C2 = 8 for i32).
+ // This pattern can occur when the operands of the 'or' are not canonicalized
+ // for some reason (not having only one use, for example).
+ bool OrOfAndAndSh = (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+ match(Op1, m_And(m_Value(), m_Value()))) ||
+ (match(Op0, m_And(m_Value(), m_Value())) &&
+ match(Op1, m_LogicalShift(m_Value(), m_Value())));
+
+ if (!OrOfOrs && !OrOfShifts && !OrOfAnds && !OrOfAndAndSh)
return nullptr;
SmallVector<Instruction*, 4> Insts;
@@ -1448,7 +1712,6 @@ static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
return false;
// One element must be all ones, and the other must be all zeros.
- // FIXME: Allow undef elements.
if (!((match(EltC1, m_Zero()) && match(EltC2, m_AllOnes())) ||
(match(EltC2, m_Zero()) && match(EltC1, m_AllOnes()))))
return false;
@@ -1755,14 +2018,15 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
Instruction *InstCombiner::visitOr(BinaryOperator &I) {
- bool Changed = SimplifyAssociativeOrCommutative(I);
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
- if (Value *V = SimplifyVectorOp(I))
+ if (Value *V = SimplifyOrInst(I.getOperand(0), I.getOperand(1),
+ SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyOrInst(Op0, Op1, SQ.getWithInstruction(&I)))
- return replaceInstUsesWith(I, V);
+ if (SimplifyAssociativeOrCommutative(I))
+ return &I;
+
+ if (Instruction *X = foldShuffledBinop(I))
+ return X;
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
@@ -1780,14 +2044,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyBSwap(I, Builder))
return replaceInstUsesWith(I, V);
- if (isa<Constant>(Op1))
- if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
- return FoldedLogic;
+ if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
+ return FoldedLogic;
// Given an OR instruction, check to see if this is a bswap.
if (Instruction *BSwap = MatchBSwap(I))
return BSwap;
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
{
Value *A;
const APInt *C;
@@ -2027,7 +2291,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
- return Changed ? &I : nullptr;
+ return nullptr;
}
/// A ^ B can be specified using other logic ops in a variety of patterns. We
@@ -2045,10 +2309,8 @@ static Instruction *foldXorToXor(BinaryOperator &I,
// (A & B) ^ (B | A) -> A ^ B
// (A | B) ^ (A & B) -> A ^ B
// (A | B) ^ (B & A) -> A ^ B
- if ((match(Op0, m_And(m_Value(A), m_Value(B))) &&
- match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) ||
- (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1, m_c_And(m_Specific(A), m_Specific(B))))) {
+ if (match(&I, m_c_Xor(m_And(m_Value(A), m_Value(B)),
+ m_c_Or(m_Deferred(A), m_Deferred(B))))) {
I.setOperand(0, A);
I.setOperand(1, B);
return &I;
@@ -2058,10 +2320,8 @@ static Instruction *foldXorToXor(BinaryOperator &I,
// (~B | A) ^ (~A | B) -> A ^ B
// (~A | B) ^ (A | ~B) -> A ^ B
// (B | ~A) ^ (A | ~B) -> A ^ B
- if ((match(Op0, m_Or(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) ||
- (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_c_Or(m_Specific(A), m_Not(m_Specific(B)))))) {
+ if (match(&I, m_Xor(m_c_Or(m_Value(A), m_Not(m_Value(B))),
+ m_c_Or(m_Not(m_Deferred(A)), m_Deferred(B))))) {
I.setOperand(0, A);
I.setOperand(1, B);
return &I;
@@ -2071,10 +2331,8 @@ static Instruction *foldXorToXor(BinaryOperator &I,
// (~B & A) ^ (~A & B) -> A ^ B
// (~A & B) ^ (A & ~B) -> A ^ B
// (B & ~A) ^ (A & ~B) -> A ^ B
- if ((match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))) ||
- (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))))) {
+ if (match(&I, m_Xor(m_c_And(m_Value(A), m_Not(m_Value(B))),
+ m_c_And(m_Not(m_Deferred(A)), m_Deferred(B))))) {
I.setOperand(0, A);
I.setOperand(1, B);
return &I;
@@ -2113,6 +2371,34 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
}
+ // TODO: This can be generalized to compares of non-signbits using
+ // decomposeBitTestICmp(). It could be enhanced more by using (something like)
+ // foldLogOpOfMaskedICmps().
+ ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
+ Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
+ Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
+ if ((LHS->hasOneUse() || RHS->hasOneUse()) &&
+ LHS0->getType() == RHS0->getType()) {
+ // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0
+ // (X < 0) ^ (Y < 0) --> (X ^ Y) < 0
+ if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
+ PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())) ||
+ (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
+ PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero()))) {
+ Value *Zero = ConstantInt::getNullValue(LHS0->getType());
+ return Builder.CreateICmpSLT(Builder.CreateXor(LHS0, RHS0), Zero);
+ }
+ // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1
+ // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1
+ if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
+ PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())) ||
+ (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
+ PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes()))) {
+ Value *MinusOne = ConstantInt::getAllOnesValue(LHS0->getType());
+ return Builder.CreateICmpSGT(Builder.CreateXor(LHS0, RHS0), MinusOne);
+ }
+ }
+
// Instead of trying to imitate the folds for and/or, decompose this 'xor'
// into those logic ops. That is, try to turn this into an and-of-icmps
// because we have many folds for that pattern.
@@ -2140,18 +2426,63 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
return nullptr;
}
+/// If we have a masked merge, in the canonical form of:
+/// (assuming that A only has one use.)
+/// | A | |B|
+/// ((x ^ y) & M) ^ y
+/// | D |
+/// * If M is inverted:
+/// | D |
+/// ((x ^ y) & ~M) ^ y
+/// We can canonicalize by swapping the final xor operand
+/// to eliminate the 'not' of the mask.
+/// ((x ^ y) & M) ^ x
+/// * If M is a constant, and D has one use, we transform to 'and' / 'or' ops
+/// because that shortens the dependency chain and improves analysis:
+/// (x & M) | (y & ~M)
+static Instruction *visitMaskedMerge(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *B, *X, *D;
+ Value *M;
+ if (!match(&I, m_c_Xor(m_Value(B),
+ m_OneUse(m_c_And(
+ m_CombineAnd(m_c_Xor(m_Deferred(B), m_Value(X)),
+ m_Value(D)),
+ m_Value(M))))))
+ return nullptr;
+
+ Value *NotM;
+ if (match(M, m_Not(m_Value(NotM)))) {
+ // De-invert the mask and swap the value in B part.
+ Value *NewA = Builder.CreateAnd(D, NotM);
+ return BinaryOperator::CreateXor(NewA, X);
+ }
+
+ Constant *C;
+ if (D->hasOneUse() && match(M, m_Constant(C))) {
+ // Unfold.
+ Value *LHS = Builder.CreateAnd(X, C);
+ Value *NotC = Builder.CreateNot(C);
+ Value *RHS = Builder.CreateAnd(B, NotC);
+ return BinaryOperator::CreateOr(LHS, RHS);
+ }
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
Instruction *InstCombiner::visitXor(BinaryOperator &I) {
- bool Changed = SimplifyAssociativeOrCommutative(I);
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
- if (Value *V = SimplifyVectorOp(I))
+ if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1),
+ SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyXorInst(Op0, Op1, SQ.getWithInstruction(&I)))
- return replaceInstUsesWith(I, V);
+ if (SimplifyAssociativeOrCommutative(I))
+ return &I;
+
+ if (Instruction *X = foldShuffledBinop(I))
+ return X;
if (Instruction *NewXor = foldXorToXor(I, Builder))
return NewXor;
@@ -2168,6 +2499,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyBSwap(I, Builder))
return replaceInstUsesWith(I, V);
+ // A^B --> A|B iff A and B have no bits set in common.
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (haveNoCommonBitsSet(Op0, Op1, DL, &AC, &I, &DT))
+ return BinaryOperator::CreateOr(Op0, Op1);
+
// Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
Value *X, *Y;
@@ -2186,6 +2522,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
return BinaryOperator::CreateAnd(X, NotY);
}
+ if (Instruction *Xor = visitMaskedMerge(I, Builder))
+ return Xor;
+
// Is this a 'not' (~) fed by a binary operator?
BinaryOperator *NotVal;
if (match(&I, m_Not(m_BinOp(NotVal)))) {
@@ -2206,6 +2545,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
+ // ~(X - Y) --> ~X + Y
+ if (match(NotVal, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))))
+ return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
+
// ~(~X >>s Y) --> (X >>s Y)
if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
return BinaryOperator::CreateAShr(X, Y);
@@ -2214,16 +2557,18 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// the 'not' by inverting the constant and using the opposite shift type.
// Canonicalization rules ensure that only a negative constant uses 'ashr',
// but we must check that in case that transform has not fired yet.
- const APInt *C;
- if (match(NotVal, m_AShr(m_APInt(C), m_Value(Y))) && C->isNegative()) {
+ Constant *C;
+ if (match(NotVal, m_AShr(m_Constant(C), m_Value(Y))) &&
+ match(C, m_Negative())) {
// ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)
- Constant *NotC = ConstantInt::get(I.getType(), ~(*C));
+ Constant *NotC = ConstantExpr::getNot(C);
return BinaryOperator::CreateLShr(NotC, Y);
}
- if (match(NotVal, m_LShr(m_APInt(C), m_Value(Y))) && C->isNonNegative()) {
+ if (match(NotVal, m_LShr(m_Constant(C), m_Value(Y))) &&
+ match(C, m_NonNegative())) {
// ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)
- Constant *NotC = ConstantInt::get(I.getType(), ~(*C));
+ Constant *NotC = ConstantExpr::getNot(C);
return BinaryOperator::CreateAShr(NotC, Y);
}
}
@@ -2305,9 +2650,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
- if (isa<Constant>(Op1))
- if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
- return FoldedLogic;
+ if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
+ return FoldedLogic;
{
Value *A, *B;
@@ -2397,25 +2741,59 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
return CastedXor;
- // Canonicalize the shifty way to code absolute value to the common pattern.
+ // Canonicalize a shifty way to code absolute value to the common pattern.
// There are 4 potential commuted variants. Move the 'ashr' candidate to Op1.
// We're relying on the fact that we only do this transform when the shift has
// exactly 2 uses and the add has exactly 1 use (otherwise, we might increase
// instructions).
- if (Op0->getNumUses() == 2)
+ if (Op0->hasNUses(2))
std::swap(Op0, Op1);
const APInt *ShAmt;
Type *Ty = I.getType();
if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&
- Op1->getNumUses() == 2 && *ShAmt == Ty->getScalarSizeInBits() - 1 &&
+ Op1->hasNUses(2) && *ShAmt == Ty->getScalarSizeInBits() - 1 &&
match(Op0, m_OneUse(m_c_Add(m_Specific(A), m_Specific(Op1))))) {
// B = ashr i32 A, 31 ; smear the sign bit
// xor (add A, B), B ; add -1 and flip bits if negative
// --> (A < 0) ? -A : A
Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty));
- return SelectInst::Create(Cmp, Builder.CreateNeg(A), A);
+ // Copy the nuw/nsw flags from the add to the negate.
+ auto *Add = cast<BinaryOperator>(Op0);
+ Value *Neg = Builder.CreateNeg(A, "", Add->hasNoUnsignedWrap(),
+ Add->hasNoSignedWrap());
+ return SelectInst::Create(Cmp, Neg, A);
+ }
+
+ // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
+ //
+ // %notx = xor i32 %x, -1
+ // %cmp1 = icmp sgt i32 %notx, %y
+ // %smax = select i1 %cmp1, i32 %notx, i32 %y
+ // %res = xor i32 %smax, -1
+ // =>
+ // %noty = xor i32 %y, -1
+ // %cmp2 = icmp slt %x, %noty
+ // %res = select i1 %cmp2, i32 %x, i32 %noty
+ //
+ // Same is applicable for smin/umax/umin.
+ {
+ Value *LHS, *RHS;
+ SelectPatternFlavor SPF = matchSelectPattern(Op0, LHS, RHS).Flavor;
+ if (Op0->hasOneUse() && SelectPatternResult::isMinOrMax(SPF) &&
+ match(Op1, m_AllOnes())) {
+
+ Value *X;
+ if (match(RHS, m_Not(m_Value(X))))
+ std::swap(RHS, LHS);
+
+ if (match(LHS, m_Not(m_Value(X)))) {
+ Value *NotY = Builder.CreateNot(RHS);
+ return SelectInst::Create(
+ Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
+ }
+ }
}
- return Changed ? &I : nullptr;
+ return nullptr;
}