1 files changed, 213 insertions, 401 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index fdc9c373b95e..2364202e5b69 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombineInternal.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PatternMatch.h"
-#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -120,35 +120,9 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
                                     ConstantInt *AndRHS,
                                     BinaryOperator &TheAnd) {
   Value *X = Op->getOperand(0);
-  Constant *Together = nullptr;
-  if (!Op->isShift())
-    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
 
   switch (Op->getOpcode()) {
   default: break;
-  case Instruction::Xor:
-    if (Op->hasOneUse()) {
-      // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
-      Value *And = Builder.CreateAnd(X, AndRHS);
-      And->takeName(Op);
-      return BinaryOperator::CreateXor(And, Together);
-    }
-    break;
-  case Instruction::Or:
-    if (Op->hasOneUse()){
-      ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
-      if (TogetherCI && !TogetherCI->isZero()){
-        // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
-        // NOTE: This reduces the number of bits set in the & mask, which
-        // can expose opportunities for store narrowing.
-        Together = ConstantExpr::getXor(AndRHS, Together);
-        Value *And = Builder.CreateAnd(X, Together);
-        And->takeName(Op);
-        return BinaryOperator::CreateOr(And, OpRHS);
-      }
-    }
-
-    break;
   case Instruction::Add:
     if (Op->hasOneUse()) {
       // Adding a one to a single bit bit-field should be turned into an XOR
@@ -182,64 +156,6 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
       }
     }
     break;
-
-  case Instruction::Shl: {
-    // We know that the AND will not produce any of the bits shifted in, so if
-    // the anded constant includes them, clear them now!
-    //
-    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
-    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
-    APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
-    ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShlMask);
-
-    if (CI->getValue() == ShlMask)
-      // Masking out bits that the shift already masks.
-      return replaceInstUsesWith(TheAnd, Op);   // No need for the and.
-
-    if (CI != AndRHS) {                  // Reducing bits set in and.
-      TheAnd.setOperand(1, CI);
-      return &TheAnd;
-    }
-    break;
-  }
-  case Instruction::LShr: {
-    // We know that the AND will not produce any of the bits shifted in, so if
-    // the anded constant includes them, clear them now!  This only applies to
-    // unsigned shifts, because a signed shr may bring in set bits!
-    //
-    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
-    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
-    APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-    ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShrMask);
-
-    if (CI->getValue() == ShrMask)
-      // Masking out bits that the shift already masks.
-      return replaceInstUsesWith(TheAnd, Op);
-
-    if (CI != AndRHS) {
-      TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
-      return &TheAnd;
-    }
-    break;
-  }
-  case Instruction::AShr:
-    // Signed shr.
-    // See if this is shifting in some sign extension, then masking it out
-    // with an and.
-    if (Op->hasOneUse()) {
-      uint32_t BitWidth = AndRHS->getType()->getBitWidth();
-      uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
-      APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-      Constant *C = Builder.getInt(AndRHS->getValue() & ShrMask);
-      if (C == AndRHS) {          // Masking out bits shifted in.
-        // (Val ashr C1) & C2 -> (Val lshr C1) & C2
-        // Make the argument unsigned.
-        Value *ShVal = Op->getOperand(0);
-        ShVal = Builder.CreateLShr(ShVal, OpRHS, Op->getName());
-        return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
-      }
-    }
-    break;
   }
   return nullptr;
 }
@@ -376,6 +292,18 @@ static unsigned conjugateICmpMask(unsigned Mask) {
   return NewMask;
 }
 
+// Adapts the external decomposeBitTestICmp for local use.
+static bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred,
+                                 Value *&X, Value *&Y, Value *&Z) {
+  APInt Mask;
+  if (!llvm::decomposeBitTestICmp(LHS, RHS, Pred, X, Mask))
+    return false;
+
+  Y = ConstantInt::get(X->getType(), Mask);
+  Z = ConstantInt::get(X->getType(), 0);
+  return true;
+}
+
 /// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E).
 /// Return the set of pattern classes (from MaskedICmpType) that both LHS and
 /// RHS satisfy.
@@ -384,10 +312,9 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
                                          ICmpInst *RHS,
                                          ICmpInst::Predicate &PredL,
                                          ICmpInst::Predicate &PredR) {
-  if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
-    return 0;
-  // vectors are not (yet?) supported
-  if (LHS->getOperand(0)->getType()->isVectorTy())
+  // vectors are not (yet?) supported. Don't support pointers either.
+  if (!LHS->getOperand(0)->getType()->isIntegerTy() ||
+      !RHS->getOperand(0)->getType()->isIntegerTy())
     return 0;
 
   // Here comes the tricky part:
@@ -400,24 +327,18 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
   Value *L2 = LHS->getOperand(1);
   Value *L11, *L12, *L21, *L22;
   // Check whether the icmp can be decomposed into a bit test.
-  if (decomposeBitTestICmp(LHS, PredL, L11, L12, L2)) {
+  if (decomposeBitTestICmp(L1, L2, PredL, L11, L12, L2)) {
     L21 = L22 = L1 = nullptr;
   } else {
     // Look for ANDs in the LHS icmp.
-    if (!L1->getType()->isIntegerTy()) {
-      // You can icmp pointers, for example. They really aren't masks.
-      L11 = L12 = nullptr;
-    } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+    if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
       // Any icmp can be viewed as being trivially masked; if it allows us to
       // remove one, it's worth it.
       L11 = L1;
       L12 = Constant::getAllOnesValue(L1->getType());
     }
 
-    if (!L2->getType()->isIntegerTy()) {
-      // You can icmp pointers, for example. They really aren't masks.
-      L21 = L22 = nullptr;
-    } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
+    if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
       L21 = L2;
       L22 = Constant::getAllOnesValue(L2->getType());
     }
@@ -431,7 +352,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
   Value *R2 = RHS->getOperand(1);
   Value *R11, *R12;
   bool Ok = false;
-  if (decomposeBitTestICmp(RHS, PredR, R11, R12, R2)) {
+  if (decomposeBitTestICmp(R1, R2, PredR, R11, R12, R2)) {
     if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
       A = R11;
       D = R12;
@@ -444,7 +365,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
     E = R2;
     R1 = nullptr;
     Ok = true;
-  } else if (R1->getType()->isIntegerTy()) {
+  } else {
     if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
       // As before, model no mask as a trivial mask if it'll let us do an
       // optimization.
@@ -470,7 +391,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
     return 0;
 
   // Look for ANDs on the right side of the RHS icmp.
-  if (!Ok && R2->getType()->isIntegerTy()) {
+  if (!Ok) {
     if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) {
       R11 = R2;
       R12 = Constant::getAllOnesValue(R2->getType());
@@ -980,17 +901,15 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   return nullptr;
 }
 
-/// Optimize (fcmp)&(fcmp).  NOTE: Unlike the rest of instcombine, this returns
-/// a Value which should already be inserted into the function.
-Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
-  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
-  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
-  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+Value *InstCombiner::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
+  Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
+  Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
+  FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
 
-  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+  if (LHS0 == RHS1 && RHS0 == LHS1) {
     // Swap RHS operands to match LHS.
-    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
-    std::swap(Op1LHS, Op1RHS);
+    PredR = FCmpInst::getSwappedPredicate(PredR);
+    std::swap(RHS0, RHS1);
   }
 
   // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
@@ -1002,31 +921,30 @@ Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   //    bool(R & CC0) && bool(R & CC1)
   //  = bool((R & CC0) & (R & CC1))
   //  = bool(R & (CC0 & CC1)) <= by re-association, commutation, and idempotency
-  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS)
-    return getFCmpValue(getFCmpCode(Op0CC) & getFCmpCode(Op1CC), Op0LHS, Op0RHS,
-                        Builder);
+  //
+  // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:
+  //    bool(R & CC0) || bool(R & CC1)
+  //  = bool((R & CC0) | (R & CC1))
+  //  = bool(R & (CC0 | CC1)) <= by reversed distribution (contribution? ;)
+  if (LHS0 == RHS0 && LHS1 == RHS1) {
+    unsigned FCmpCodeL = getFCmpCode(PredL);
+    unsigned FCmpCodeR = getFCmpCode(PredR);
+    unsigned NewPred = IsAnd ? FCmpCodeL & FCmpCodeR : FCmpCodeL | FCmpCodeR;
+    return getFCmpValue(NewPred, LHS0, LHS1, Builder);
+  }
 
-  if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
-      RHS->getPredicate() == FCmpInst::FCMP_ORD) {
-    if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
+  if ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) ||
+      (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && !IsAnd)) {
+    if (LHS0->getType() != RHS0->getType())
       return nullptr;
 
-    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
-    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
-      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
-        // If either of the constants are nans, then the whole thing returns
-        // false.
-        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-          return Builder.getFalse();
-        return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
-      }
-
-    // Handle vector zeros.  This occurs because the canonical form of
-    // "fcmp ord x,x" is "fcmp ord x, 0".
-    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
-        isa<ConstantAggregateZero>(RHS->getOperand(1)))
-      return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
-    return nullptr;
+    // FCmp canonicalization ensures that (fcmp ord/uno X, X) and
+    // (fcmp ord/uno X, C) will be transformed to (fcmp X, 0.0).
+    if (match(LHS1, m_Zero()) && LHS1 == RHS1)
+      // Ignore the constants because they are obviously not NANs:
+      // (fcmp ord x, 0.0) & (fcmp ord y, 0.0)  -> (fcmp ord x, y)
+      // (fcmp uno x, 0.0) | (fcmp uno y, 0.0)  -> (fcmp uno x, y)
+      return Builder.CreateFCmp(PredL, LHS0, RHS0);
   }
 
   return nullptr;
@@ -1069,30 +987,24 @@ bool InstCombiner::shouldOptimizeCast(CastInst *CI) {
     if (isEliminableCastPair(PrecedingCI, CI))
       return false;
 
-  // If this is a vector sext from a compare, then we don't want to break the
-  // idiom where each element of the extended vector is either zero or all ones.
-  if (CI->getOpcode() == Instruction::SExt &&
-      isa<CmpInst>(CastSrc) && CI->getDestTy()->isVectorTy())
-    return false;
-
   return true;
 }
 
 /// Fold {and,or,xor} (cast X), C.
 static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
                                           InstCombiner::BuilderTy &Builder) {
-  Constant *C;
-  if (!match(Logic.getOperand(1), m_Constant(C)))
+  Constant *C = dyn_cast<Constant>(Logic.getOperand(1));
+  if (!C)
     return nullptr;
 
   auto LogicOpc = Logic.getOpcode();
   Type *DestTy = Logic.getType();
   Type *SrcTy = Cast->getSrcTy();
 
-  // Move the logic operation ahead of a zext if the constant is unchanged in
-  // the smaller source type. Performing the logic in a smaller type may provide
-  // more information to later folds, and the smaller logic instruction may be
-  // cheaper (particularly in the case of vectors).
+  // Move the logic operation ahead of a zext or sext if the constant is
+  // unchanged in the smaller source type. Performing the logic in a smaller
+  // type may provide more information to later folds, and the smaller logic
+  // instruction may be cheaper (particularly in the case of vectors).
   Value *X;
   if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) {
     Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);
@@ -1104,6 +1016,16 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
     }
   }
 
+  if (match(Cast, m_OneUse(m_SExt(m_Value(X))))) {
+    Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);
+    Constant *SextTruncC = ConstantExpr::getSExt(TruncC, DestTy);
+    if (SextTruncC == C) {
+      // LogicOpc (sext X), C --> sext (LogicOpc X, C)
+      Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC);
+      return new SExtInst(NewOp, DestTy);
+    }
+  }
+
   return nullptr;
 }
 
@@ -1167,38 +1089,9 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
   // cast is otherwise not optimizable.  This happens for vector sexts.
   FCmpInst *FCmp0 = dyn_cast<FCmpInst>(Cast0Src);
   FCmpInst *FCmp1 = dyn_cast<FCmpInst>(Cast1Src);
-  if (FCmp0 && FCmp1) {
-    Value *Res = LogicOpc == Instruction::And ? foldAndOfFCmps(FCmp0, FCmp1)
-                                              : foldOrOfFCmps(FCmp0, FCmp1);
-    if (Res)
-      return CastInst::Create(CastOpcode, Res, DestTy);
-    return nullptr;
-  }
-
-  return nullptr;
-}
-
-static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // Canonicalize SExt or Not to the LHS
-  if (match(Op1, m_SExt(m_Value())) || match(Op1, m_Not(m_Value()))) {
-    std::swap(Op0, Op1);
-  }
-
-  // Fold (and (sext bool to A), B) --> (select bool, B, 0)
-  Value *X = nullptr;
-  if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
-    Value *Zero = Constant::getNullValue(Op1->getType());
-    return SelectInst::Create(X, Op1, Zero);
-  }
-
-  // Fold (and ~(sext bool to A), B) --> (select bool, 0, B)
-  if (match(Op0, m_Not(m_SExt(m_Value(X)))) &&
-      X->getType()->isIntOrIntVectorTy(1)) {
-    Value *Zero = Constant::getNullValue(Op0->getType());
-    return SelectInst::Create(X, Zero, Op1);
-  }
+  if (FCmp0 && FCmp1)
+    if (Value *R = foldLogicOfFCmps(FCmp0, FCmp1, LogicOpc == Instruction::And))
+      return CastInst::Create(CastOpcode, R, DestTy);
 
   return nullptr;
 }
@@ -1284,14 +1177,61 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   if (Value *V = SimplifyBSwap(I, Builder))
     return replaceInstUsesWith(I, V);
 
-  if (match(Op1, m_One())) {
-    // (1 << x) & 1 --> zext(x == 0)
-    // (1 >> x) & 1 --> zext(x == 0)
-    Value *X;
-    if (match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X))))) {
+  const APInt *C;
+  if (match(Op1, m_APInt(C))) {
+    Value *X, *Y;
+    if (match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X)))) &&
+        C->isOneValue()) {
+      // (1 << X) & 1 --> zext(X == 0)
+      // (1 >> X) & 1 --> zext(X == 0)
       Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(I.getType(), 0));
       return new ZExtInst(IsZero, I.getType());
     }
+
+    const APInt *XorC;
+    if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_APInt(XorC))))) {
+      // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+      Constant *NewC = ConstantInt::get(I.getType(), *C & *XorC);
+      Value *And = Builder.CreateAnd(X, Op1);
+      And->takeName(Op0);
+      return BinaryOperator::CreateXor(And, NewC);
+    }
+
+    const APInt *OrC;
+    if (match(Op0, m_OneUse(m_Or(m_Value(X), m_APInt(OrC))))) {
+      // (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2)
+      // NOTE: This reduces the number of bits set in the & mask, which
+      // can expose opportunities for store narrowing for scalars.
+      // NOTE: SimplifyDemandedBits should have already removed bits from C1
+      // that aren't set in C2. Meaning we can replace (C1&C2) with C1 in
+      // above, but this feels safer.
+      APInt Together = *C & *OrC;
+      Value *And = Builder.CreateAnd(X, ConstantInt::get(I.getType(),
+                                                         Together ^ *C));
+      And->takeName(Op0);
+      return BinaryOperator::CreateOr(And, ConstantInt::get(I.getType(),
+                                                            Together));
+    }
+
+    // If the mask is only needed on one incoming arm, push the 'and' op up.
+    if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) ||
+        match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
+      APInt NotAndMask(~(*C));
+      BinaryOperator::BinaryOps BinOp = cast<BinaryOperator>(Op0)->getOpcode();
+      if (MaskedValueIsZero(X, NotAndMask, 0, &I)) {
+        // Not masking anything out for the LHS, move mask to RHS.
+        // and ({x}or X, Y), C --> {x}or X, (and Y, C)
+        Value *NewRHS = Builder.CreateAnd(Y, Op1, Y->getName() + ".masked");
+        return BinaryOperator::Create(BinOp, X, NewRHS);
+      }
+      if (!isa<Constant>(Y) && MaskedValueIsZero(Y, NotAndMask, 0, &I)) {
+        // Not masking anything out for the RHS, move mask to LHS.
+        // and ({x}or X, Y), C --> {x}or (and X, C), Y
+        Value *NewLHS = Builder.CreateAnd(X, Op1, X->getName() + ".masked");
+        return BinaryOperator::Create(BinOp, NewLHS, Y);
+      }
+    }
+
   }
 
   if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
@@ -1299,34 +1239,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
 
     // Optimize a variety of ((val OP C1) & C2) combinations...
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
-      Value *Op0LHS = Op0I->getOperand(0);
-      Value *Op0RHS = Op0I->getOperand(1);
-      switch (Op0I->getOpcode()) {
-      default: break;
-      case Instruction::Xor:
-      case Instruction::Or: {
-        // If the mask is only needed on one incoming arm, push it up.
-        if (!Op0I->hasOneUse()) break;
-
-        APInt NotAndRHS(~AndRHSMask);
-        if (MaskedValueIsZero(Op0LHS, NotAndRHS, 0, &I)) {
-          // Not masking anything out for the LHS, move to RHS.
-          Value *NewRHS = Builder.CreateAnd(Op0RHS, AndRHS,
-                                            Op0RHS->getName()+".masked");
-          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
-        }
-        if (!isa<Constant>(Op0RHS) &&
-            MaskedValueIsZero(Op0RHS, NotAndRHS, 0, &I)) {
-          // Not masking anything out for the RHS, move to LHS.
-          Value *NewLHS = Builder.CreateAnd(Op0LHS, AndRHS,
-                                            Op0LHS->getName()+".masked");
-          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
-        }
-
-        break;
-      }
-      }
-
       // ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
       // of X and OP behaves well when given trunc(C1) and X.
       switch (Op0I->getOpcode()) {
@@ -1343,6 +1255,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
           if (AndRHSMask.isIntN(X->getType()->getScalarSizeInBits())) {
             auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
             Value *BinOp;
+            Value *Op0LHS = Op0I->getOperand(0);
             if (isa<ZExtInst>(Op0LHS))
               BinOp = Builder.CreateBinOp(Op0I->getOpcode(), X, TruncC1);
             else
@@ -1467,17 +1380,22 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     }
   }
 
-  // If and'ing two fcmp, try combine them into one.
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
     if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
-      if (Value *Res = foldAndOfFCmps(LHS, RHS))
+      if (Value *Res = foldLogicOfFCmps(LHS, RHS, true))
         return replaceInstUsesWith(I, Res);
 
   if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
     return CastedAnd;
 
-  if (Instruction *Select = foldBoolSextMaskToSelect(I))
-    return Select;
+  // and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>.
+  Value *A;
+  if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
+      A->getType()->isIntOrIntVectorTy(1))
+    return SelectInst::Create(A, Op1, Constant::getNullValue(I.getType()));
+  if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
+      A->getType()->isIntOrIntVectorTy(1))
+    return SelectInst::Create(A, Op0, Constant::getNullValue(I.getType()));
 
   return Changed ? &I : nullptr;
 }
@@ -1567,8 +1485,9 @@ static Value *getSelectCondition(Value *A, Value *B,
   // If both operands are constants, see if the constants are inverse bitmasks.
   Constant *AC, *BC;
   if (match(A, m_Constant(AC)) && match(B, m_Constant(BC)) &&
-      areInverseVectorBitmasks(AC, BC))
-    return ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty));
+      areInverseVectorBitmasks(AC, BC)) {
+    return Builder.CreateZExtOrTrunc(AC, CmpInst::makeCmpResultType(Ty));
+  }
 
   // If both operands are xor'd with constants using the same sexted boolean
   // operand, see if the constants are inverse bitmasks.
@@ -1832,120 +1751,6 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   return nullptr;
 }
 
-/// Optimize (fcmp)|(fcmp).  NOTE: Unlike the rest of instcombine, this returns
-/// a Value which should already be inserted into the function.
-Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
-  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
-  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
-  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
-
-  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
-    // Swap RHS operands to match LHS.
-    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
-    std::swap(Op1LHS, Op1RHS);
-  }
-
-  // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
-  // This is a similar transformation to the one in FoldAndOfFCmps.
-  //
-  // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:
-  //    bool(R & CC0) || bool(R & CC1)
-  //  = bool((R & CC0) | (R & CC1))
-  //  = bool(R & (CC0 | CC1)) <= by reversed distribution (contribution? ;)
-  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS)
-    return getFCmpValue(getFCmpCode(Op0CC) | getFCmpCode(Op1CC), Op0LHS, Op0RHS,
-                        Builder);
-
-  if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
-      RHS->getPredicate() == FCmpInst::FCMP_UNO &&
-      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
-    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
-      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
-        // If either of the constants are nans, then the whole thing returns
-        // true.
-        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-          return Builder.getTrue();
-
-        // Otherwise, no need to compare the two constants, compare the
-        // rest.
-        return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
-      }
-
-    // Handle vector zeros.  This occurs because the canonical form of
-    // "fcmp uno x,x" is "fcmp uno x, 0".
-    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
-        isa<ConstantAggregateZero>(RHS->getOperand(1)))
-      return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
-
-    return nullptr;
-  }
-
-  return nullptr;
-}
-
-/// This helper function folds:
-///
-///     ((A | B) & C1) | (B & C2)
-///
-/// into:
-///
-///     (A & C1) | B
-///
-/// when the XOR of the two constants is "all ones" (-1).
-static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
-                                        Value *A, Value *B, Value *C,
-                                        InstCombiner::BuilderTy &Builder) {
-  ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
-  if (!CI1) return nullptr;
-
-  Value *V1 = nullptr;
-  ConstantInt *CI2 = nullptr;
-  if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return nullptr;
-
-  APInt Xor = CI1->getValue() ^ CI2->getValue();
-  if (!Xor.isAllOnesValue()) return nullptr;
-
-  if (V1 == A || V1 == B) {
-    Value *NewOp = Builder.CreateAnd((V1 == A) ? B : A, CI1);
-    return BinaryOperator::CreateOr(NewOp, V1);
-  }
-
-  return nullptr;
-}
-
-/// \brief This helper function folds:
-///
-///     ((A ^ B) & C1) | (B & C2)
-///
-/// into:
-///
-///     (A & C1) ^ B
-///
-/// when the XOR of the two constants is "all ones" (-1).
-static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op,
-                                         Value *A, Value *B, Value *C,
-                                         InstCombiner::BuilderTy &Builder) {
-  ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
-  if (!CI1)
-    return nullptr;
-
-  Value *V1 = nullptr;
-  ConstantInt *CI2 = nullptr;
-  if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2))))
-    return nullptr;
-
-  APInt Xor = CI1->getValue() ^ CI2->getValue();
-  if (!Xor.isAllOnesValue())
-    return nullptr;
-
-  if (V1 == A || V1 == B) {
-    Value *NewOp = Builder.CreateAnd(V1 == A ? B : A, CI1);
-    return BinaryOperator::CreateXor(NewOp, V1);
-  }
-
-  return nullptr;
-}
-
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
@@ -2011,10 +1816,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   Value *C = nullptr, *D = nullptr;
   if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
       match(Op1, m_And(m_Value(B), m_Value(D)))) {
-    Value *V1 = nullptr, *V2 = nullptr;
     ConstantInt *C1 = dyn_cast<ConstantInt>(C);
     ConstantInt *C2 = dyn_cast<ConstantInt>(D);
     if (C1 && C2) {  // (A & C1)|(B & C2)
+      Value *V1 = nullptr, *V2 = nullptr;
       if ((C1->getValue() & C2->getValue()).isNullValue()) {
         // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
         // iff (C1&C2) == 0 and (N&~C1) == 0
@@ -2046,6 +1851,24 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
                                  Builder.getInt(C1->getValue()|C2->getValue()));
         }
       }
+
+      if (C1->getValue() == ~C2->getValue()) {
+        Value *X;
+
+        // ((X|B)&C1)|(B&C2) -> (X&C1) | B iff C1 == ~C2
+        if (match(A, m_c_Or(m_Value(X), m_Specific(B))))
+          return BinaryOperator::CreateOr(Builder.CreateAnd(X, C1), B);
+        // (A&C2)|((X|A)&C1) -> (X&C2) | A iff C1 == ~C2
+        if (match(B, m_c_Or(m_Specific(A), m_Value(X))))
+          return BinaryOperator::CreateOr(Builder.CreateAnd(X, C2), A);
+
+        // ((X^B)&C1)|(B&C2) -> (X&C1) ^ B iff C1 == ~C2
+        if (match(A, m_c_Xor(m_Value(X), m_Specific(B))))
+          return BinaryOperator::CreateXor(Builder.CreateAnd(X, C1), B);
+        // (A&C2)|((X^A)&C1) -> (X&C2) ^ A iff C1 == ~C2
+        if (match(B, m_c_Xor(m_Specific(A), m_Value(X))))
+          return BinaryOperator::CreateXor(Builder.CreateAnd(X, C2), A);
+      }
     }
 
     // Don't try to form a select if it's unlikely that we'll get rid of at
@@ -2070,27 +1893,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder))
         return replaceInstUsesWith(I, V);
     }
-
-    // ((A|B)&1)|(B&-2) -> (A&1) | B
-    if (match(A, m_c_Or(m_Value(V1), m_Specific(B)))) {
-      if (Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C, Builder))
-        return Ret;
-    }
-    // (B&-2)|((A|B)&1) -> (A&1) | B
-    if (match(B, m_c_Or(m_Specific(A), m_Value(V1)))) {
-      if (Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D, Builder))
-        return Ret;
-    }
-    // ((A^B)&1)|(B&-2) -> (A&1) ^ B
-    if (match(A, m_c_Xor(m_Value(V1), m_Specific(B)))) {
-      if (Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C, Builder))
-        return Ret;
-    }
-    // (B&-2)|((A^B)&1) -> (A&1) ^ B
-    if (match(B, m_c_Xor(m_Specific(A), m_Value(V1)))) {
-      if (Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D, Builder))
-        return Ret;
-    }
   }
 
   // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
@@ -2182,10 +1984,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     }
   }
 
-  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
     if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
-      if (Value *Res = foldOrOfFCmps(LHS, RHS))
+      if (Value *Res = foldLogicOfFCmps(LHS, RHS, false))
         return replaceInstUsesWith(I, Res);
 
   if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
@@ -2434,60 +2235,51 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     return replaceInstUsesWith(I, Op0);
   }
 
-  if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) {
-    // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
-    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
-      if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
-        if (CI->hasOneUse() && Op0C->hasOneUse()) {
-          Instruction::CastOps Opcode = Op0C->getOpcode();
-          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
-              (RHSC == ConstantExpr::getCast(Opcode, Builder.getTrue(),
-                                             Op0C->getDestTy()))) {
-            CI->setPredicate(CI->getInversePredicate());
-            return CastInst::Create(Opcode, CI, Op0C->getType());
-          }
+  {
+    const APInt *RHSC;
+    if (match(Op1, m_APInt(RHSC))) {
+      Value *X;
+      const APInt *C;
+      if (match(Op0, m_Sub(m_APInt(C), m_Value(X)))) {
+        // ~(c-X) == X-c-1 == X+(-c-1)
+        if (RHSC->isAllOnesValue()) {
+          Constant *NewC = ConstantInt::get(I.getType(), -(*C) - 1);
+          return BinaryOperator::CreateAdd(X, NewC);
+        }
+        if (RHSC->isSignMask()) {
+          // (C - X) ^ signmask -> (C + signmask - X)
+          Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
+          return BinaryOperator::CreateSub(NewC, X);
+        }
+      } else if (match(Op0, m_Add(m_Value(X), m_APInt(C)))) {
+        // ~(X-c) --> (-c-1)-X
+        if (RHSC->isAllOnesValue()) {
+          Constant *NewC = ConstantInt::get(I.getType(), -(*C) - 1);
+          return BinaryOperator::CreateSub(NewC, X);
         }
+        if (RHSC->isSignMask()) {
+          // (X + C) ^ signmask -> (X + C + signmask)
+          Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
+          return BinaryOperator::CreateAdd(X, NewC);
+        }
+      }
+
+      // (X|C1)^C2 -> X^(C1^C2) iff X&~C1 == 0
+      if (match(Op0, m_Or(m_Value(X), m_APInt(C))) &&
+          MaskedValueIsZero(X, *C, 0, &I)) {
+        Constant *NewC = ConstantInt::get(I.getType(), *C ^ *RHSC);
+        Worklist.Add(cast<Instruction>(Op0));
+        I.setOperand(0, X);
+        I.setOperand(1, NewC);
+        return &I;
       }
     }
+  }
 
+  if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) {
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
-      // ~(c-X) == X-c-1 == X+(-c-1)
-      if (Op0I->getOpcode() == Instruction::Sub && RHSC->isMinusOne())
-        if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
-          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
-          return BinaryOperator::CreateAdd(Op0I->getOperand(1),
-                                           SubOne(NegOp0I0C));
-        }
-
       if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
-        if (Op0I->getOpcode() == Instruction::Add) {
-          // ~(X-c) --> (-c-1)-X
-          if (RHSC->isMinusOne()) {
-            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
-            return BinaryOperator::CreateSub(SubOne(NegOp0CI),
-                                             Op0I->getOperand(0));
-          } else if (RHSC->getValue().isSignMask()) {
-            // (X + C) ^ signmask -> (X + C + signmask)
-            Constant *C = Builder.getInt(RHSC->getValue() + Op0CI->getValue());
-            return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
-
-          }
-        } else if (Op0I->getOpcode() == Instruction::Or) {
-          // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
-          if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue(),
-                                0, &I)) {
-            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHSC);
-            // Anything in both C1 and C2 is known to be zero, remove it from
-            // NewRHS.
-            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHSC);
-            NewRHS = ConstantExpr::getAnd(NewRHS,
-                                       ConstantExpr::getNot(CommonBits));
-            Worklist.Add(Op0I);
-            I.setOperand(0, Op0I->getOperand(0));
-            I.setOperand(1, NewRHS);
-            return &I;
-          }
-        } else if (Op0I->getOpcode() == Instruction::LShr) {
+        if (Op0I->getOpcode() == Instruction::LShr) {
           // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
           // E1 = "X ^ C1"
           BinaryOperator *E1;
@@ -2605,5 +2397,25 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
     return CastedXor;
 
+  // Canonicalize the shifty way to code absolute value to the common pattern.
+  // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1.
+  // We're relying on the fact that we only do this transform when the shift has
+  // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase
+  // instructions).
+  if (Op0->getNumUses() == 2)
+    std::swap(Op0, Op1);
+
+  const APInt *ShAmt;
+  Type *Ty = I.getType();
+  if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&
+      Op1->getNumUses() == 2 && *ShAmt == Ty->getScalarSizeInBits() - 1 &&
+      match(Op0, m_OneUse(m_c_Add(m_Specific(A), m_Specific(Op1))))) {
+    // B = ashr i32 A, 31 ; smear the sign bit
+    // xor (add A, B), B  ; add -1 and flip bits if negative
+    // --> (A < 0) ? -A : A
+    Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty));
+    return SelectInst::Create(Cmp, Builder.CreateNeg(A), A);
+  }
+
   return Changed ? &I : nullptr;
 }