1 files changed, 456 insertions, 78 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2364202e5b69..372bc41f780e 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -14,10 +14,10 @@
 #include "InstCombineInternal.h"
 #include "llvm/Analysis/CmpInstAnalysis.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PatternMatch.h"
-#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -75,7 +75,7 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
   return Builder.CreateFCmp(Pred, LHS, RHS);
 }
 
-/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
+/// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
 /// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B))
 /// \param I Binary operator to transform.
 /// \return Pointer to node that must replace the original binary operator, or
@@ -305,17 +305,21 @@ static bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pre
 }
 
 /// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E).
-/// Return the set of pattern classes (from MaskedICmpType) that both LHS and
-/// RHS satisfy.
-static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
-                                         Value *&D, Value *&E, ICmpInst *LHS,
-                                         ICmpInst *RHS,
-                                         ICmpInst::Predicate &PredL,
-                                         ICmpInst::Predicate &PredR) {
+/// Return the pattern classes (from MaskedICmpType) for the left hand side and
+/// the right hand side as a pair.
+/// LHS and RHS are the left hand side and the right hand side ICmps and PredL
+/// and PredR are their predicates, respectively.
+static
+Optional<std::pair<unsigned, unsigned>>
+getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
+                         Value *&D, Value *&E, ICmpInst *LHS,
+                         ICmpInst *RHS,
+                         ICmpInst::Predicate &PredL,
+                         ICmpInst::Predicate &PredR) {
   // vectors are not (yet?) supported. Don't support pointers either.
   if (!LHS->getOperand(0)->getType()->isIntegerTy() ||
       !RHS->getOperand(0)->getType()->isIntegerTy())
-    return 0;
+    return None;
 
   // Here comes the tricky part:
   // LHS might be of the form L11 & L12 == X, X == L21 & L22,
@@ -346,7 +350,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
 
   // Bail if LHS was a icmp that can't be decomposed into an equality.
   if (!ICmpInst::isEquality(PredL))
-    return 0;
+    return None;
 
   Value *R1 = RHS->getOperand(0);
   Value *R2 = RHS->getOperand(1);
@@ -360,7 +364,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
       A = R12;
       D = R11;
     } else {
-      return 0;
+      return None;
     }
     E = R2;
     R1 = nullptr;
@@ -388,7 +392,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
 
   // Bail if RHS was a icmp that can't be decomposed into an equality.
   if (!ICmpInst::isEquality(PredR))
-    return 0;
+    return None;
 
   // Look for ANDs on the right side of the RHS icmp.
   if (!Ok) {
@@ -408,11 +412,11 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
       E = R1;
       Ok = true;
     } else {
-      return 0;
+      return None;
     }
   }
   if (!Ok)
-    return 0;
+    return None;
 
   if (L11 == A) {
     B = L12;
@@ -430,7 +434,174 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
 
   unsigned LeftType = getMaskedICmpType(A, B, C, PredL);
   unsigned RightType = getMaskedICmpType(A, D, E, PredR);
-  return LeftType & RightType;
+  return Optional<std::pair<unsigned, unsigned>>(std::make_pair(LeftType, RightType));
+}
+
+/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single
+/// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros
+/// and the right hand side is of type BMask_Mixed. For example,
+/// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8).
+static Value * foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
+    ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
+    Value *A, Value *B, Value *C, Value *D, Value *E,
+    ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
+    llvm::InstCombiner::BuilderTy &Builder) {
+  // We are given the canonical form:
+  //   (icmp ne (A & B), 0) & (icmp eq (A & D), E).
+  // where D & E == E.
+  //
+  // If IsAnd is false, we get it in negated form:
+  //   (icmp eq (A & B), 0) | (icmp ne (A & D), E) ->
+  //      !((icmp ne (A & B), 0) & (icmp eq (A & D), E)).
+  //
+  // We currently handle the case of B, C, D, E are constant.
+  //
+  ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+  if (!BCst)
+    return nullptr;
+  ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+  if (!CCst)
+    return nullptr;
+  ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+  if (!DCst)
+    return nullptr;
+  ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+  if (!ECst)
+    return nullptr;
+
+  ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+
+  // Update E to the canonical form when D is a power of two and RHS is
+  // canonicalized as,
+  // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or
+  // (icmp ne (A & D), D) -> (icmp eq (A & D), 0).
+  if (PredR != NewCC)
+    ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
+
+  // If B or D is zero, skip because if LHS or RHS can be trivially folded by
+  // other folding rules and this pattern won't apply any more.
+  if (BCst->getValue() == 0 || DCst->getValue() == 0)
+    return nullptr;
+
+  // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't
+  // deduce anything from it.
+  // For example,
+  // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding.
+  if ((BCst->getValue() & DCst->getValue()) == 0)
+    return nullptr;
+
+  // If the following two conditions are met:
+  //
+  // 1. mask B covers only a single bit that's not covered by mask D, that is,
+  // (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of
+  // B and D has only one bit set) and,
+  //
+  // 2. RHS (and E) indicates that the rest of B's bits are zero (in other
+  // words, the intersection of B and D is zero), that is, ((B & D) & E) == 0
+  //
+  // then that single bit in B must be one and thus the whole expression can be
+  // folded to
+  //   (A & (B | D)) == (B & (B ^ D)) | E.
+  //
+  // For example,
+  // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9)
+  // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8)
+  if ((((BCst->getValue() & DCst->getValue()) & ECst->getValue()) == 0) &&
+      (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())).isPowerOf2()) {
+    APInt BorD = BCst->getValue() | DCst->getValue();
+    APInt BandBxorDorE = (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())) |
+        ECst->getValue();
+    Value *NewMask = ConstantInt::get(BCst->getType(), BorD);
+    Value *NewMaskedValue = ConstantInt::get(BCst->getType(), BandBxorDorE);
+    Value *NewAnd = Builder.CreateAnd(A, NewMask);
+    return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue);
+  }
+
+  auto IsSubSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) {
+    return (C1->getValue() & C2->getValue()) == C1->getValue();
+  };
+  auto IsSuperSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) {
+    return (C1->getValue() & C2->getValue()) == C2->getValue();
+  };
+
+  // In the following, we consider only the cases where B is a superset of D, B
+  // is a subset of D, or B == D because otherwise there's at least one bit
+  // covered by B but not D, in which case we can't deduce much from it, so
+  // no folding (aside from the single must-be-one bit case right above.)
+  // For example,
+  // (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding.
+  if (!IsSubSetOrEqual(BCst, DCst) && !IsSuperSetOrEqual(BCst, DCst))
+    return nullptr;
+
+  // At this point, either B is a superset of D, B is a subset of D or B == D.
+
+  // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict
+  // and the whole expression becomes false (or true if negated), otherwise, no
+  // folding.
+  // For example,
+  // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false.
+  // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding.
+  if (ECst->isZero()) {
+    if (IsSubSetOrEqual(BCst, DCst))
+      return ConstantInt::get(LHS->getType(), !IsAnd);
+    return nullptr;
+  }
+
+  // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B ==
+  // D. If B is a superset of (or equal to) D, since E is not zero, LHS is
+  // subsumed by RHS (RHS implies LHS.) So the whole expression becomes
+  // RHS. For example,
+  // (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
+  // (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
+  if (IsSuperSetOrEqual(BCst, DCst))
+    return RHS;
+  // Otherwise, B is a subset of D. If B and E have a common bit set,
+  // ie. (B & E) != 0, then LHS is subsumed by RHS. For example.
+  // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
+  assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code");
+  if ((BCst->getValue() & ECst->getValue()) != 0)
+    return RHS;
+  // Otherwise, LHS and RHS contradict and the whole expression becomes false
+  // (or true if negated.) For example,
+  // (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false.
+  // (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false.
+  return ConstantInt::get(LHS->getType(), !IsAnd);
+}
+
+/// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single
+/// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side
+/// aren't of the common mask pattern type.
+static Value *foldLogOpOfMaskedICmpsAsymmetric(
+    ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
+    Value *A, Value *B, Value *C, Value *D, Value *E,
+    ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
+    unsigned LHSMask, unsigned RHSMask,
+    llvm::InstCombiner::BuilderTy &Builder) {
+  assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
+         "Expected equality predicates for masked type of icmps.");
+  // Handle Mask_NotAllZeros-BMask_Mixed cases.
+  // (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or
+  // (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E)
+  //    which gets swapped to
+  //    (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C).
+  if (!IsAnd) {
+    LHSMask = conjugateICmpMask(LHSMask);
+    RHSMask = conjugateICmpMask(RHSMask);
+  }
+  if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) {
+    if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
+            LHS, RHS, IsAnd, A, B, C, D, E,
+            PredL, PredR, Builder)) {
+      return V;
+    }
+  } else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) {
+    if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
+            RHS, LHS, IsAnd, A, D, E, B, C,
+            PredR, PredL, Builder)) {
+      return V;
+    }
+  }
+  return nullptr;
 }
 
 /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
@@ -439,13 +610,24 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
                                      llvm::InstCombiner::BuilderTy &Builder) {
   Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
   ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
-  unsigned Mask =
+  Optional<std::pair<unsigned, unsigned>> MaskPair =
       getMaskedTypeForICmpPair(A, B, C, D, E, LHS, RHS, PredL, PredR);
-  if (Mask == 0)
+  if (!MaskPair)
     return nullptr;
-
   assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
          "Expected equality predicates for masked type of icmps.");
+  unsigned LHSMask = MaskPair->first;
+  unsigned RHSMask = MaskPair->second;
+  unsigned Mask = LHSMask & RHSMask;
+  if (Mask == 0) {
+    // Even if the two sides don't share a common pattern, check if folding can
+    // still happen.
+    if (Value *V = foldLogOpOfMaskedICmpsAsymmetric(
+            LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask,
+            Builder))
+      return V;
+    return nullptr;
+  }
 
   // In full generality:
   //     (icmp (A & B) Op C) | (icmp (A & D) Op E)
@@ -939,8 +1121,8 @@ Value *InstCombiner::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd)
       return nullptr;
 
     // FCmp canonicalization ensures that (fcmp ord/uno X, X) and
-    // (fcmp ord/uno X, C) will be transformed to (fcmp X, 0.0).
-    if (match(LHS1, m_Zero()) && LHS1 == RHS1)
+    // (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0).
+    if (match(LHS1, m_PosZeroFP()) && match(RHS1, m_PosZeroFP()))
       // Ignore the constants because they are obviously not NANs:
       // (fcmp ord x, 0.0) & (fcmp ord y, 0.0)  -> (fcmp ord x, y)
       // (fcmp uno x, 0.0) | (fcmp uno y, 0.0)  -> (fcmp uno x, y)
@@ -1106,8 +1288,8 @@ static Instruction *foldAndToXor(BinaryOperator &I,
   // Operand complexity canonicalization guarantees that the 'or' is Op0.
   // (A | B) & ~(A & B) --> A ^ B
   // (A | B) & ~(B & A) --> A ^ B
-  if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
-      match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B)))))
+  if (match(&I, m_BinOp(m_Or(m_Value(A), m_Value(B)),
+                        m_Not(m_c_And(m_Deferred(A), m_Deferred(B))))))
     return BinaryOperator::CreateXor(A, B);
 
   // (A | ~B) & (~A | B) --> ~(A ^ B)
@@ -1115,8 +1297,8 @@ static Instruction *foldAndToXor(BinaryOperator &I,
   // (~B | A) & (~A | B) --> ~(A ^ B)
   // (~B | A) & (B | ~A) --> ~(A ^ B)
   if (Op0->hasOneUse() || Op1->hasOneUse())
-    if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
-        match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B))))
+    if (match(&I, m_BinOp(m_c_Or(m_Value(A), m_Not(m_Value(B))),
+                          m_c_Or(m_Not(m_Deferred(A)), m_Deferred(B)))))
       return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
 
   return nullptr;
@@ -1148,18 +1330,86 @@ static Instruction *foldOrToXor(BinaryOperator &I,
   return nullptr;
 }
 
+/// Return true if a constant shift amount is always less than the specified
+/// bit-width. If not, the shift could create poison in the narrower type.
+static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) {
+  if (auto *ScalarC = dyn_cast<ConstantInt>(C))
+    return ScalarC->getZExtValue() < BitWidth;
+
+  if (C->getType()->isVectorTy()) {
+    // Check each element of a constant vector.
+    unsigned NumElts = C->getType()->getVectorNumElements();
+    for (unsigned i = 0; i != NumElts; ++i) {
+      Constant *Elt = C->getAggregateElement(i);
+      if (!Elt)
+        return false;
+      if (isa<UndefValue>(Elt))
+        continue;
+      auto *CI = dyn_cast<ConstantInt>(Elt);
+      if (!CI || CI->getZExtValue() >= BitWidth)
+        return false;
+    }
+    return true;
+  }
+
+  // The constant is a constant expression or unknown.
+  return false;
+}
+
+/// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and
+/// a common zext operand: and (binop (zext X), C), (zext X).
+Instruction *InstCombiner::narrowMaskedBinOp(BinaryOperator &And) {
+  // This transform could also apply to {or, and, xor}, but there are better
+  // folds for those cases, so we don't expect those patterns here. AShr is not
+  // handled because it should always be transformed to LShr in this sequence.
+  // The subtract transform is different because it has a constant on the left.
+  // Add/mul commute the constant to RHS; sub with constant RHS becomes add.
+  Value *Op0 = And.getOperand(0), *Op1 = And.getOperand(1);
+  Constant *C;
+  if (!match(Op0, m_OneUse(m_Add(m_Specific(Op1), m_Constant(C)))) &&
+      !match(Op0, m_OneUse(m_Mul(m_Specific(Op1), m_Constant(C)))) &&
+      !match(Op0, m_OneUse(m_LShr(m_Specific(Op1), m_Constant(C)))) &&
+      !match(Op0, m_OneUse(m_Shl(m_Specific(Op1), m_Constant(C)))) &&
+      !match(Op0, m_OneUse(m_Sub(m_Constant(C), m_Specific(Op1)))))
+    return nullptr;
+
+  Value *X;
+  if (!match(Op1, m_ZExt(m_Value(X))) || Op1->hasNUsesOrMore(3))
+    return nullptr;
+
+  Type *Ty = And.getType();
+  if (!isa<VectorType>(Ty) && !shouldChangeType(Ty, X->getType()))
+    return nullptr;
+
+  // If we're narrowing a shift, the shift amount must be safe (less than the
+  // width) in the narrower type. If the shift amount is greater, instsimplify
+  // usually handles that case, but we can't guarantee/assert it.
+  Instruction::BinaryOps Opc = cast<BinaryOperator>(Op0)->getOpcode();
+  if (Opc == Instruction::LShr || Opc == Instruction::Shl)
+    if (!canNarrowShiftAmt(C, X->getType()->getScalarSizeInBits()))
+      return nullptr;
+
+  // and (sub C, (zext X)), (zext X) --> zext (and (sub C', X), X)
+  // and (binop (zext X), C), (zext X) --> zext (and (binop X, C'), X)
+  Value *NewC = ConstantExpr::getTrunc(C, X->getType());
+  Value *NewBO = Opc == Instruction::Sub ? Builder.CreateBinOp(Opc, NewC, X)
+                                         : Builder.CreateBinOp(Opc, X, NewC);
+  return new ZExtInst(Builder.CreateAnd(NewBO, X), Ty);
+}
+
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
 Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
-  bool Changed = SimplifyAssociativeOrCommutative(I);
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (Value *V = SimplifyVectorOp(I))
+  if (Value *V = SimplifyAndInst(I.getOperand(0), I.getOperand(1),
+                                 SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyAndInst(Op0, Op1, SQ.getWithInstruction(&I)))
-    return replaceInstUsesWith(I, V);
+  if (SimplifyAssociativeOrCommutative(I))
+    return &I;
+
+  if (Instruction *X = foldShuffledBinop(I))
+    return X;
 
   // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
@@ -1177,6 +1427,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   if (Value *V = SimplifyBSwap(I, Builder))
     return replaceInstUsesWith(I, V);
 
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   const APInt *C;
   if (match(Op1, m_APInt(C))) {
     Value *X, *Y;
@@ -1289,9 +1540,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     }
   }
 
-  if (isa<Constant>(Op1))
-    if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
-      return FoldedLogic;
+  if (Instruction *Z = narrowMaskedBinOp(I))
+    return Z;
+
+  if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
+    return FoldedLogic;
 
   if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
     return DeMorgan;
@@ -1397,7 +1650,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       A->getType()->isIntOrIntVectorTy(1))
     return SelectInst::Create(A, Op0, Constant::getNullValue(I.getType()));
 
-  return Changed ? &I : nullptr;
+  return nullptr;
 }
 
 /// Given an OR instruction, check to see if this is a bswap idiom. If so,
@@ -1424,7 +1677,18 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
   bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) &&
                   match(Op1, m_And(m_Value(), m_Value()));
 
-  if (!OrOfOrs && !OrOfShifts && !OrOfAnds)
+  // (A << B) | (C & D)                              -> bswap if possible.
+  // The bigger pattern here is ((A & C1) << C2) | ((B >> C2) & C1), which is a
+  // part of the bswap idiom for specific values of C1, C2 (e.g. C1 = 16711935,
+  // C2 = 8 for i32).
+  // This pattern can occur when the operands of the 'or' are not canonicalized
+  // for some reason (not having only one use, for example).
+  bool OrOfAndAndSh = (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+                       match(Op1, m_And(m_Value(), m_Value()))) ||
+                      (match(Op0, m_And(m_Value(), m_Value())) &&
+                       match(Op1, m_LogicalShift(m_Value(), m_Value())));
+
+  if (!OrOfOrs && !OrOfShifts && !OrOfAnds && !OrOfAndAndSh)
     return nullptr;
 
   SmallVector<Instruction*, 4> Insts;
@@ -1448,7 +1712,6 @@ static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
       return false;
 
     // One element must be all ones, and the other must be all zeros.
-    // FIXME: Allow undef elements.
     if (!((match(EltC1, m_Zero()) && match(EltC2, m_AllOnes())) ||
           (match(EltC2, m_Zero()) && match(EltC1, m_AllOnes()))))
       return false;
@@ -1755,14 +2018,15 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
 Instruction *InstCombiner::visitOr(BinaryOperator &I) {
-  bool Changed = SimplifyAssociativeOrCommutative(I);
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (Value *V = SimplifyVectorOp(I))
+  if (Value *V = SimplifyOrInst(I.getOperand(0), I.getOperand(1),
+                                SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyOrInst(Op0, Op1, SQ.getWithInstruction(&I)))
-    return replaceInstUsesWith(I, V);
+  if (SimplifyAssociativeOrCommutative(I))
+    return &I;
+
+  if (Instruction *X = foldShuffledBinop(I))
+    return X;
 
   // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
@@ -1780,14 +2044,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (Value *V = SimplifyBSwap(I, Builder))
     return replaceInstUsesWith(I, V);
 
-  if (isa<Constant>(Op1))
-    if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
-      return FoldedLogic;
+  if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
+    return FoldedLogic;
 
   // Given an OR instruction, check to see if this is a bswap.
   if (Instruction *BSwap = MatchBSwap(I))
     return BSwap;
 
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   {
     Value *A;
     const APInt *C;
@@ -2027,7 +2291,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     }
   }
 
-  return Changed ? &I : nullptr;
+  return nullptr;
 }
 
 /// A ^ B can be specified using other logic ops in a variety of patterns. We
@@ -2045,10 +2309,8 @@ static Instruction *foldXorToXor(BinaryOperator &I,
   // (A & B) ^ (B | A) -> A ^ B
   // (A | B) ^ (A & B) -> A ^ B
   // (A | B) ^ (B & A) -> A ^ B
-  if ((match(Op0, m_And(m_Value(A), m_Value(B))) &&
-       match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) ||
-      (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
-       match(Op1, m_c_And(m_Specific(A), m_Specific(B))))) {
+  if (match(&I, m_c_Xor(m_And(m_Value(A), m_Value(B)),
+                        m_c_Or(m_Deferred(A), m_Deferred(B))))) {
     I.setOperand(0, A);
     I.setOperand(1, B);
     return &I;
@@ -2058,10 +2320,8 @@ static Instruction *foldXorToXor(BinaryOperator &I,
   // (~B | A) ^ (~A | B) -> A ^ B
   // (~A | B) ^ (A | ~B) -> A ^ B
   // (B | ~A) ^ (A | ~B) -> A ^ B
-  if ((match(Op0, m_Or(m_Value(A), m_Not(m_Value(B)))) &&
-       match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) ||
-      (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
-       match(Op1, m_c_Or(m_Specific(A), m_Not(m_Specific(B)))))) {
+  if (match(&I, m_Xor(m_c_Or(m_Value(A), m_Not(m_Value(B))),
+                      m_c_Or(m_Not(m_Deferred(A)), m_Deferred(B))))) {
     I.setOperand(0, A);
     I.setOperand(1, B);
     return &I;
@@ -2071,10 +2331,8 @@ static Instruction *foldXorToXor(BinaryOperator &I,
   // (~B & A) ^ (~A & B) -> A ^ B
   // (~A & B) ^ (A & ~B) -> A ^ B
   // (B & ~A) ^ (A & ~B) -> A ^ B
-  if ((match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
-       match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))) ||
-      (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) &&
-       match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))))) {
+  if (match(&I, m_Xor(m_c_And(m_Value(A), m_Not(m_Value(B))),
+                      m_c_And(m_Not(m_Deferred(A)), m_Deferred(B))))) {
     I.setOperand(0, A);
     I.setOperand(1, B);
     return &I;
@@ -2113,6 +2371,34 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     }
   }
 
+  // TODO: This can be generalized to compares of non-signbits using
+  // decomposeBitTestICmp(). It could be enhanced more by using (something like)
+  // foldLogOpOfMaskedICmps().
+  ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
+  Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
+  Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
+  if ((LHS->hasOneUse() || RHS->hasOneUse()) &&
+      LHS0->getType() == RHS0->getType()) {
+    // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0
+    // (X <  0) ^ (Y <  0) --> (X ^ Y) < 0
+    if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
+         PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())) ||
+        (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
+         PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero()))) {
+      Value *Zero = ConstantInt::getNullValue(LHS0->getType());
+      return Builder.CreateICmpSLT(Builder.CreateXor(LHS0, RHS0), Zero);
+    }
+    // (X > -1) ^ (Y <  0) --> (X ^ Y) > -1
+    // (X <  0) ^ (Y > -1) --> (X ^ Y) > -1
+    if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
+         PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())) ||
+        (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
+         PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes()))) {
+      Value *MinusOne = ConstantInt::getAllOnesValue(LHS0->getType());
+      return Builder.CreateICmpSGT(Builder.CreateXor(LHS0, RHS0), MinusOne);
+    }
+  }
+
   // Instead of trying to imitate the folds for and/or, decompose this 'xor'
   // into those logic ops. That is, try to turn this into an and-of-icmps
   // because we have many folds for that pattern.
@@ -2140,18 +2426,63 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   return nullptr;
 }
 
+/// If we have a masked merge, in the canonical form of:
+/// (assuming that A only has one use.)
+///   |        A  |  |B|
+///   ((x ^ y) & M) ^ y
+///    |  D  |
+/// * If M is inverted:
+///      |  D  |
+///     ((x ^ y) & ~M) ^ y
+///   We can canonicalize by swapping the final xor operand
+///   to eliminate the 'not' of the mask.
+///     ((x ^ y) & M) ^ x
+/// * If M is a constant, and D has one use, we transform to 'and' / 'or' ops
+///   because that shortens the dependency chain and improves analysis:
+///     (x & M) | (y & ~M)
+static Instruction *visitMaskedMerge(BinaryOperator &I,
+                                     InstCombiner::BuilderTy &Builder) {
+  Value *B, *X, *D;
+  Value *M;
+  if (!match(&I, m_c_Xor(m_Value(B),
+                         m_OneUse(m_c_And(
+                             m_CombineAnd(m_c_Xor(m_Deferred(B), m_Value(X)),
+                                          m_Value(D)),
+                             m_Value(M))))))
+    return nullptr;
+
+  Value *NotM;
+  if (match(M, m_Not(m_Value(NotM)))) {
+    // De-invert the mask and swap the value in B part.
+    Value *NewA = Builder.CreateAnd(D, NotM);
+    return BinaryOperator::CreateXor(NewA, X);
+  }
+
+  Constant *C;
+  if (D->hasOneUse() && match(M, m_Constant(C))) {
+    // Unfold.
+    Value *LHS = Builder.CreateAnd(X, C);
+    Value *NotC = Builder.CreateNot(C);
+    Value *RHS = Builder.CreateAnd(B, NotC);
+    return BinaryOperator::CreateOr(LHS, RHS);
+  }
+
+  return nullptr;
+}
+
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
 Instruction *InstCombiner::visitXor(BinaryOperator &I) {
-  bool Changed = SimplifyAssociativeOrCommutative(I);
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (Value *V = SimplifyVectorOp(I))
+  if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1),
+                                 SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyXorInst(Op0, Op1, SQ.getWithInstruction(&I)))
-    return replaceInstUsesWith(I, V);
+  if (SimplifyAssociativeOrCommutative(I))
+    return &I;
+
+  if (Instruction *X = foldShuffledBinop(I))
+    return X;
 
   if (Instruction *NewXor = foldXorToXor(I, Builder))
     return NewXor;
@@ -2168,6 +2499,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyBSwap(I, Builder))
     return replaceInstUsesWith(I, V);
 
+  // A^B --> A|B iff A and B have no bits set in common.
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  if (haveNoCommonBitsSet(Op0, Op1, DL, &AC, &I, &DT))
+    return BinaryOperator::CreateOr(Op0, Op1);
+
   // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
   Value *X, *Y;
 
@@ -2186,6 +2522,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     return BinaryOperator::CreateAnd(X, NotY);
   }
 
+  if (Instruction *Xor = visitMaskedMerge(I, Builder))
+    return Xor;
+
   // Is this a 'not' (~) fed by a binary operator?
   BinaryOperator *NotVal;
   if (match(&I, m_Not(m_BinOp(NotVal)))) {
@@ -2206,6 +2545,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       }
     }
 
+    // ~(X - Y) --> ~X + Y
+    if (match(NotVal, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))))
+      return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
+
     // ~(~X >>s Y) --> (X >>s Y)
     if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
       return BinaryOperator::CreateAShr(X, Y);
@@ -2214,16 +2557,18 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     // the 'not' by inverting the constant and using the opposite shift type.
     // Canonicalization rules ensure that only a negative constant uses 'ashr',
     // but we must check that in case that transform has not fired yet.
-    const APInt *C;
-    if (match(NotVal, m_AShr(m_APInt(C), m_Value(Y))) && C->isNegative()) {
+    Constant *C;
+    if (match(NotVal, m_AShr(m_Constant(C), m_Value(Y))) &&
+        match(C, m_Negative())) {
       // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)
-      Constant *NotC = ConstantInt::get(I.getType(), ~(*C));
+      Constant *NotC = ConstantExpr::getNot(C);
       return BinaryOperator::CreateLShr(NotC, Y);
     }
 
-    if (match(NotVal, m_LShr(m_APInt(C), m_Value(Y))) && C->isNonNegative()) {
+    if (match(NotVal, m_LShr(m_Constant(C), m_Value(Y))) &&
+        match(C, m_NonNegative())) {
       // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)
-      Constant *NotC = ConstantInt::get(I.getType(), ~(*C));
+      Constant *NotC = ConstantExpr::getNot(C);
       return BinaryOperator::CreateAShr(NotC, Y);
     }
   }
@@ -2305,9 +2650,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     }
   }
 
-  if (isa<Constant>(Op1))
-    if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
-      return FoldedLogic;
+  if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
+    return FoldedLogic;
 
   {
     Value *A, *B;
@@ -2397,25 +2741,59 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
     return CastedXor;
 
-  // Canonicalize the shifty way to code absolute value to the common pattern.
+  // Canonicalize a shifty way to code absolute value to the common pattern.
   // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1.
   // We're relying on the fact that we only do this transform when the shift has
   // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase
   // instructions).
-  if (Op0->getNumUses() == 2)
+  if (Op0->hasNUses(2))
     std::swap(Op0, Op1);
 
   const APInt *ShAmt;
   Type *Ty = I.getType();
   if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&
-      Op1->getNumUses() == 2 && *ShAmt == Ty->getScalarSizeInBits() - 1 &&
+      Op1->hasNUses(2) && *ShAmt == Ty->getScalarSizeInBits() - 1 &&
       match(Op0, m_OneUse(m_c_Add(m_Specific(A), m_Specific(Op1))))) {
     // B = ashr i32 A, 31 ; smear the sign bit
     // xor (add A, B), B  ; add -1 and flip bits if negative
     // --> (A < 0) ? -A : A
     Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty));
-    return SelectInst::Create(Cmp, Builder.CreateNeg(A), A);
+    // Copy the nuw/nsw flags from the add to the negate.
+    auto *Add = cast<BinaryOperator>(Op0);
+    Value *Neg = Builder.CreateNeg(A, "", Add->hasNoUnsignedWrap(),
+                                   Add->hasNoSignedWrap());
+    return SelectInst::Create(Cmp, Neg, A);
+  }
+
+  // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
+  //
+  //   %notx = xor i32 %x, -1
+  //   %cmp1 = icmp sgt i32 %notx, %y
+  //   %smax = select i1 %cmp1, i32 %notx, i32 %y
+  //   %res = xor i32 %smax, -1
+  // =>
+  //   %noty = xor i32 %y, -1
+  //   %cmp2 = icmp slt %x, %noty
+  //   %res = select i1 %cmp2, i32 %x, i32 %noty
+  //
+  // Same is applicable for smin/umax/umin.
+  {
+    Value *LHS, *RHS;
+    SelectPatternFlavor SPF = matchSelectPattern(Op0, LHS, RHS).Flavor;
+    if (Op0->hasOneUse() && SelectPatternResult::isMinOrMax(SPF) &&
+        match(Op1, m_AllOnes())) {
+
+      Value *X;
+      if (match(RHS, m_Not(m_Value(X))))
+        std::swap(RHS, LHS);
+
+      if (match(LHS, m_Not(m_Value(X)))) {
+        Value *NotY = Builder.CreateNot(RHS);
+        return SelectInst::Create(
+            Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
+      }
+    }
   }
 
-  return Changed ? &I : nullptr;
+  return nullptr;
 }