1 files changed, 658 insertions, 186 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4c8b63d2f2397..48e03c6da68fb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -32,7 +32,6 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -58,6 +57,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Mutex.h"
@@ -89,11 +89,16 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
 
 #define DEBUG_TYPE "selectiondag"
 
+static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
+       cl::Hidden, cl::init(true),
+       cl::desc("Gang up loads and stores generated by inlining of memcpy"));
+
+static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max",
+       cl::desc("Number limit for gluing ld/st of memcpy."),
+       cl::Hidden, cl::init(0));
+
 static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
-  DEBUG(
-    dbgs() << Msg;
-    V.getNode()->dump(G);
-  );
+  LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G););
 }
 
 //===----------------------------------------------------------------------===//
@@ -263,6 +268,52 @@ bool ISD::allOperandsUndef(const SDNode *N) {
   return true;
 }
 
+bool ISD::matchUnaryPredicate(SDValue Op,
+                              std::function<bool(ConstantSDNode *)> Match) {
+  if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
+    return Match(Cst);
+
+  if (ISD::BUILD_VECTOR != Op.getOpcode())
+    return false;
+
+  EVT SVT = Op.getValueType().getScalarType();
+  for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+    auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+    if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
+      return false;
+  }
+  return true;
+}
+
+bool ISD::matchBinaryPredicate(
+    SDValue LHS, SDValue RHS,
+    std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
+  if (LHS.getValueType() != RHS.getValueType())
+    return false;
+
+  if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
+    if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
+      return Match(LHSCst, RHSCst);
+
+  if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
+      ISD::BUILD_VECTOR != RHS.getOpcode())
+    return false;
+
+  EVT SVT = LHS.getValueType().getScalarType();
+  for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+    auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
+    auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
+    if (!LHSCst || !RHSCst)
+      return false;
+    if (LHSCst->getValueType(0) != SVT ||
+        LHSCst->getValueType(0) != RHSCst->getValueType(0))
+      return false;
+    if (!Match(LHSCst, RHSCst))
+      return false;
+  }
+  return true;
+}
+
 ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
   switch (ExtType) {
   case ISD::EXTLOAD:
@@ -487,12 +538,41 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     ID.AddInteger(ST->getPointerInfo().getAddrSpace());
     break;
   }
+  case ISD::MLOAD: {
+    const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
+    ID.AddInteger(MLD->getMemoryVT().getRawBits());
+    ID.AddInteger(MLD->getRawSubclassData());
+    ID.AddInteger(MLD->getPointerInfo().getAddrSpace());
+    break;
+  }
+  case ISD::MSTORE: {
+    const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+    ID.AddInteger(MST->getMemoryVT().getRawBits());
+    ID.AddInteger(MST->getRawSubclassData());
+    ID.AddInteger(MST->getPointerInfo().getAddrSpace());
+    break;
+  }
+  case ISD::MGATHER: {
+    const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N);
+    ID.AddInteger(MG->getMemoryVT().getRawBits());
+    ID.AddInteger(MG->getRawSubclassData());
+    ID.AddInteger(MG->getPointerInfo().getAddrSpace());
+    break;
+  }
+  case ISD::MSCATTER: {
+    const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N);
+    ID.AddInteger(MS->getMemoryVT().getRawBits());
+    ID.AddInteger(MS->getRawSubclassData());
+    ID.AddInteger(MS->getPointerInfo().getAddrSpace());
+    break;
+  }
   case ISD::ATOMIC_CMP_SWAP:
   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
   case ISD::ATOMIC_SWAP:
   case ISD::ATOMIC_LOAD_ADD:
   case ISD::ATOMIC_LOAD_SUB:
   case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_CLR:
   case ISD::ATOMIC_LOAD_OR:
   case ISD::ATOMIC_LOAD_XOR:
   case ISD::ATOMIC_LOAD_NAND:
@@ -726,7 +806,7 @@ static void VerifySDNode(SDNode *N) {
 }
 #endif // NDEBUG
 
-/// \brief Insert a newly allocated node into the DAG.
+/// Insert a newly allocated node into the DAG.
 ///
 /// Handles insertion into the all nodes list and CSE map, as well as
 /// verification and other common operations when a new node is allocated.
@@ -903,13 +983,16 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
 
 void SelectionDAG::init(MachineFunction &NewMF,
                         OptimizationRemarkEmitter &NewORE,
-                        Pass *PassPtr) {
+                        Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
+                        DivergenceAnalysis * Divergence) {
   MF = &NewMF;
   SDAGISelPass = PassPtr;
   ORE = &NewORE;
   TLI = getSubtarget().getTargetLowering();
   TSI = getSubtarget().getSelectionDAGInfo();
+  LibInfo = LibraryInfo;
   Context = &MF->getFunction().getContext();
+  DA = Divergence;
 }
 
 SelectionDAG::~SelectionDAG() {
@@ -1077,21 +1160,25 @@ SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
 }
 
 SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
-  EVT EltVT = VT.getScalarType();
-  SDValue TrueValue;
-  switch (TLI->getBooleanContents(VT)) {
-    case TargetLowering::ZeroOrOneBooleanContent:
-    case TargetLowering::UndefinedBooleanContent:
-      TrueValue = getConstant(1, DL, VT);
-      break;
-    case TargetLowering::ZeroOrNegativeOneBooleanContent:
-      TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL,
-                              VT);
-      break;
-  }
+  SDValue TrueValue = getBoolConstant(true, DL, VT, VT);
   return getNode(ISD::XOR, DL, VT, Val, TrueValue);
 }
 
+SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT,
+                                      EVT OpVT) {
+  if (!V)
+    return getConstant(0, DL, VT);
+
+  switch (TLI->getBooleanContents(OpVT)) {
+  case TargetLowering::ZeroOrOneBooleanContent:
+  case TargetLowering::UndefinedBooleanContent:
+    return getConstant(1, DL, VT);
+  case TargetLowering::ZeroOrNegativeOneBooleanContent:
+    return getAllOnesConstant(DL, VT);
+  }
+  llvm_unreachable("Unexpected boolean content enum!");
+}
+
 SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
                                   bool isT, bool isO) {
   EVT EltVT = VT.getScalarType();
@@ -1184,7 +1271,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
       return SDValue(N, 0);
 
   if (!N) {
-    N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
+    N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT);
     CSEMap.InsertNode(N, IP);
     InsertNode(N);
     NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this);
@@ -1227,7 +1314,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
       return SDValue(N, 0);
 
   if (!N) {
-    N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT);
+    N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT);
     CSEMap.InsertNode(N, IP);
     InsertNode(N);
   }
@@ -1503,33 +1590,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
   if (N1.isUndef())
     commuteShuffle(N1, N2, MaskVec);
 
-  // If shuffling a splat, try to blend the splat instead. We do this here so
-  // that even when this arises during lowering we don't have to re-handle it.
-  auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
-    BitVector UndefElements;
-    SDValue Splat = BV->getSplatValue(&UndefElements);
-    if (!Splat)
-      return;
+  if (TLI->hasVectorBlend()) {
+    // If shuffling a splat, try to blend the splat instead. We do this here so
+    // that even when this arises during lowering we don't have to re-handle it.
+    auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+      BitVector UndefElements;
+      SDValue Splat = BV->getSplatValue(&UndefElements);
+      if (!Splat)
+        return;
 
-    for (int i = 0; i < NElts; ++i) {
-      if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
-        continue;
+      for (int i = 0; i < NElts; ++i) {
+        if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
+          continue;
 
-      // If this input comes from undef, mark it as such.
-      if (UndefElements[MaskVec[i] - Offset]) {
-        MaskVec[i] = -1;
-        continue;
-      }
+        // If this input comes from undef, mark it as such.
+        if (UndefElements[MaskVec[i] - Offset]) {
+          MaskVec[i] = -1;
+          continue;
+        }
 
-      // If we can blend a non-undef lane, use that instead.
-      if (!UndefElements[i])
-        MaskVec[i] = i + Offset;
-    }
-  };
-  if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
-    BlendSplat(N1BV, 0);
-  if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
-    BlendSplat(N2BV, NElts);
+        // If we can blend a non-undef lane, use that instead.
+        if (!UndefElements[i])
+          MaskVec[i] = i + Offset;
+      }
+    };
+    if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+      BlendSplat(N1BV, 0);
+    if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+      BlendSplat(N2BV, NElts);
+  }
 
   // Canonicalize all index into lhs, -> shuffle lhs, undef
   // Canonicalize all index into rhs, -> shuffle rhs, undef
@@ -1643,7 +1732,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
 }
 
 SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
-  MVT VT = SV.getSimpleValueType(0);
+  EVT VT = SV.getValueType(0);
   SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
   ShuffleVectorSDNode::commuteMask(MaskVec);
 
@@ -1661,6 +1750,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
     return SDValue(E, 0);
 
   auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
+  N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
   CSEMap.InsertNode(N, IP);
   InsertNode(N);
   return SDValue(N, 0);
@@ -1870,19 +1960,15 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
 
 SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
                                 ISD::CondCode Cond, const SDLoc &dl) {
+  EVT OpVT = N1.getValueType();
+
   // These setcc operations always fold.
   switch (Cond) {
   default: break;
   case ISD::SETFALSE:
-  case ISD::SETFALSE2: return getConstant(0, dl, VT);
+  case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT);
   case ISD::SETTRUE:
-  case ISD::SETTRUE2: {
-    TargetLowering::BooleanContent Cnt =
-        TLI->getBooleanContents(N1->getValueType(0));
-    return getConstant(
-        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
-        VT);
-  }
+  case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT);
 
   case ISD::SETOEQ:
   case ISD::SETOGT:
@@ -1905,16 +1991,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
 
       switch (Cond) {
       default: llvm_unreachable("Unknown integer setcc!");
-      case ISD::SETEQ:  return getConstant(C1 == C2, dl, VT);
-      case ISD::SETNE:  return getConstant(C1 != C2, dl, VT);
-      case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT);
-      case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT);
-      case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT);
-      case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT);
-      case ISD::SETLT:  return getConstant(C1.slt(C2), dl, VT);
-      case ISD::SETGT:  return getConstant(C1.sgt(C2), dl, VT);
-      case ISD::SETLE:  return getConstant(C1.sle(C2), dl, VT);
-      case ISD::SETGE:  return getConstant(C1.sge(C2), dl, VT);
+      case ISD::SETEQ:  return getBoolConstant(C1 == C2, dl, VT, OpVT);
+      case ISD::SETNE:  return getBoolConstant(C1 != C2, dl, VT, OpVT);
+      case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT);
+      case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT);
+      case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT);
+      case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT);
+      case ISD::SETLT:  return getBoolConstant(C1.slt(C2), dl, VT, OpVT);
+      case ISD::SETGT:  return getBoolConstant(C1.sgt(C2), dl, VT, OpVT);
+      case ISD::SETLE:  return getBoolConstant(C1.sle(C2), dl, VT, OpVT);
+      case ISD::SETGE:  return getBoolConstant(C1.sge(C2), dl, VT, OpVT);
       }
     }
   }
@@ -1926,41 +2012,54 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
       case ISD::SETEQ:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         LLVM_FALLTHROUGH;
-      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
+      case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
+                                               OpVT);
       case ISD::SETNE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         LLVM_FALLTHROUGH;
-      case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
-                                           R==APFloat::cmpLessThan, dl, VT);
+      case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+                                               R==APFloat::cmpLessThan, dl, VT,
+                                               OpVT);
       case ISD::SETLT:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         LLVM_FALLTHROUGH;
-      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
+      case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
+                                               OpVT);
       case ISD::SETGT:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         LLVM_FALLTHROUGH;
-      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
+      case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
+                                               VT, OpVT);
       case ISD::SETLE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         LLVM_FALLTHROUGH;
-      case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
-                                           R==APFloat::cmpEqual, dl, VT);
+      case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
+                                               R==APFloat::cmpEqual, dl, VT,
+                                               OpVT);
       case ISD::SETGE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         LLVM_FALLTHROUGH;
-      case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
-                                           R==APFloat::cmpEqual, dl, VT);
-      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, dl, VT);
-      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, dl, VT);
-      case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
-                                           R==APFloat::cmpEqual, dl, VT);
-      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT);
-      case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
-                                           R==APFloat::cmpLessThan, dl, VT);
-      case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
-                                           R==APFloat::cmpUnordered, dl, VT);
-      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT);
-      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT);
+      case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+                                           R==APFloat::cmpEqual, dl, VT, OpVT);
+      case ISD::SETO:   return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
+                                               OpVT);
+      case ISD::SETUO:  return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
+                                               OpVT);
+      case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
+                                               R==APFloat::cmpEqual, dl, VT,
+                                               OpVT);
+      case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
+                                               OpVT);
+      case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
+                                               R==APFloat::cmpLessThan, dl, VT,
+                                               OpVT);
+      case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+                                               R==APFloat::cmpUnordered, dl, VT,
+                                               OpVT);
+      case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
+                                               VT, OpVT);
+      case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
+                                               OpVT);
       }
     } else {
       // Ensure that the constant occurs on the RHS.
@@ -2297,10 +2396,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       break;
     }
 
-    // Support big-endian targets when it becomes useful.
     bool IsLE = getDataLayout().isLittleEndian();
-    if (!IsLE)
-      break;
 
     // Bitcast 'small element' vector to 'large element' scalar/vector.
     if ((BitWidth % SubBitWidth) == 0) {
@@ -2319,8 +2415,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       for (unsigned i = 0; i != SubScale; ++i) {
         computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
                          Depth + 1);
-        Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i);
-        Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i);
+        unsigned Shifts = IsLE ? i : SubScale - 1 - i;
+        Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts);
+        Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts);
       }
     }
 
@@ -2342,7 +2439,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       Known.Zero.setAllBits(); Known.One.setAllBits();
       for (unsigned i = 0; i != NumElts; ++i)
         if (DemandedElts[i]) {
-          unsigned Offset = (i % SubScale) * BitWidth;
+          unsigned Shifts = IsLE ? i : NumElts - 1 - i;
+          unsigned Offset = (Shifts % SubScale) * BitWidth;
           Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
           Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
           // If we don't know any bits, early out.
@@ -2441,6 +2539,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   case ISD::SMULO:
   case ISD::UMULO:
+  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
     if (Op.getResNo() != 1)
       break;
     // The boolean result conforms to getBooleanContents.
@@ -2904,11 +3003,38 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   }
   case ISD::SMIN:
   case ISD::SMAX: {
-    computeKnownBits(Op.getOperand(0), Known, DemandedElts,
-                     Depth + 1);
-    // If we don't know any bits, early out.
-    if (Known.isUnknown())
-      break;
+    // If we have a clamp pattern, we know that the number of sign bits will be
+    // the minimum of the clamp min/max range.
+    bool IsMax = (Opcode == ISD::SMAX);
+    ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+    if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+      if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+        CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
+                                              DemandedElts);
+    if (CstLow && CstHigh) {
+      if (!IsMax)
+        std::swap(CstLow, CstHigh);
+
+      const APInt &ValueLow = CstLow->getAPIntValue();
+      const APInt &ValueHigh = CstHigh->getAPIntValue();
+      if (ValueLow.sle(ValueHigh)) {
+        unsigned LowSignBits = ValueLow.getNumSignBits();
+        unsigned HighSignBits = ValueHigh.getNumSignBits();
+        unsigned MinSignBits = std::min(LowSignBits, HighSignBits);
+        if (ValueLow.isNegative() && ValueHigh.isNegative()) {
+          Known.One.setHighBits(MinSignBits);
+          break;
+        }
+        if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) {
+          Known.Zero.setHighBits(MinSignBits);
+          break;
+        }
+      }
+    }
+
+    // Fallback - just get the shared known bits of the operands.
+    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    if (Known.isUnknown()) break; // Early-out
     computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
     Known.Zero &= Known2.Zero;
     Known.One &= Known2.One;
@@ -3038,7 +3164,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
   if (!DemandedElts)
     return 1;  // No demanded elts, better to assume we don't know anything.
 
-  switch (Op.getOpcode()) {
+  unsigned Opcode = Op.getOpcode();
+  switch (Opcode) {
   default: break;
   case ISD::AssertSext:
     Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
@@ -3189,7 +3316,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     return std::min(Tmp, Tmp2);
 
   case ISD::SMIN:
-  case ISD::SMAX:
+  case ISD::SMAX: {
+    // If we have a clamp pattern, we know that the number of sign bits will be
+    // the minimum of the clamp min/max range.
+    bool IsMax = (Opcode == ISD::SMAX);
+    ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+    if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+      if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+        CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
+                                              DemandedElts);
+    if (CstLow && CstHigh) {
+      if (!IsMax)
+        std::swap(CstLow, CstHigh);
+      if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) {
+        Tmp = CstLow->getAPIntValue().getNumSignBits();
+        Tmp2 = CstHigh->getAPIntValue().getNumSignBits();
+        return std::min(Tmp, Tmp2);
+      }
+    }
+
+    // Fallback - just get the minimum number of sign bits of the operands.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+    if (Tmp == 1)
+      return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+    return std::min(Tmp, Tmp2);
+  }
   case ISD::UMIN:
   case ISD::UMAX:
     Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
@@ -3225,7 +3377,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
       unsigned RotAmt = C->getAPIntValue().urem(VTBits);
 
       // Handle rotate right by N like a rotate left by 32-N.
-      if (Op.getOpcode() == ISD::ROTR)
+      if (Opcode == ISD::ROTR)
         RotAmt = (VTBits - RotAmt) % VTBits;
 
       // If we aren't rotating out all of the known-in sign bits, return the
@@ -3423,10 +3575,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
   }
 
   // Allow the target to implement this method for its nodes.
-  if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
-      Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
-      Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
-      Op.getOpcode() == ISD::INTRINSIC_VOID) {
+  if (Opcode >= ISD::BUILTIN_OP_END ||
+      Opcode == ISD::INTRINSIC_WO_CHAIN ||
+      Opcode == ISD::INTRINSIC_W_CHAIN ||
+      Opcode == ISD::INTRINSIC_VOID) {
     unsigned NumBits =
         TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
     if (NumBits > 1)
@@ -3487,17 +3639,33 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
   return false;
 }
 
-bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
+  assert(Op.getValueType().isFloatingPoint() &&
+         "Floating point type expected");
+
   // If the value is a constant, we can obviously see if it is a zero or not.
+  // TODO: Add BuildVector support.
   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
     return !C->isZero();
+  return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+  assert(!Op.getValueType().isFloatingPoint() &&
+         "Floating point types unsupported - use isKnownNeverZeroFloat");
+
+  // If the value is a constant, we can obviously see if it is a zero or not.
+  if (ISD::matchUnaryPredicate(
+          Op, [](ConstantSDNode *C) { return !C->isNullValue(); }))
+    return true;
 
   // TODO: Recognize more cases here.
   switch (Op.getOpcode()) {
   default: break;
   case ISD::OR:
-    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
-      return !C->isNullValue();
+    if (isKnownNeverZero(Op.getOperand(1)) ||
+        isKnownNeverZero(Op.getOperand(0)))
+      return true;
     break;
   }
 
@@ -3517,6 +3685,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
   return false;
 }
 
+// FIXME: unify with llvm::haveNoCommonBitsSet.
+// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M)
 bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
   assert(A.getValueType() == B.getValueType() &&
          "Values must have the same type");
@@ -3841,11 +4011,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     else if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
 
-    // (ext (trunx x)) -> x
+    // (ext (trunc x)) -> x
     if (OpOpcode == ISD::TRUNCATE) {
       SDValue OpOp = Operand.getOperand(0);
-      if (OpOp.getValueType() == VT)
+      if (OpOp.getValueType() == VT) {
+        transferDbgValues(Operand, OpOp);
         return OpOp;
+      }
     }
     break;
   case ISD::TRUNCATE:
@@ -3921,10 +4093,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   case ISD::FNEG:
     // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
-    if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
-      // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
+    if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
+        OpOpcode == ISD::FSUB)
       return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
-                     Operand.getOperand(0), Operand.getNode()->getFlags());
+                     Operand.getOperand(0), Flags);
     if (OpOpcode == ISD::FNEG)  // --X -> X
       return Operand.getOperand(0);
     break;
@@ -4314,24 +4486,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM:
-    if (getTarget().Options.UnsafeFPMath) {
-      if (Opcode == ISD::FADD) {
-        // x+0 --> x
-        if (N2CFP && N2CFP->getValueAPF().isZero())
-          return N1;
-      } else if (Opcode == ISD::FSUB) {
-        // x-0 --> x
-        if (N2CFP && N2CFP->getValueAPF().isZero())
-          return N1;
-      } else if (Opcode == ISD::FMUL) {
-        // x*0 --> 0
-        if (N2CFP && N2CFP->isZero())
-          return N2;
-        // x*1 --> x
-        if (N2CFP && N2CFP->isExactlyValue(1.0))
-          return N1;
-      }
-    }
     assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
     assert(N1.getValueType() == N2.getValueType() &&
            N1.getValueType() == VT && "Binary operator types must match!");
@@ -4448,12 +4602,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   }
   case ISD::EXTRACT_VECTOR_ELT:
+    assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() &&
+           "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
+             element type of the vector.");
+
     // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
     if (N1.isUndef())
       return getUNDEF(VT);
 
     // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
-    if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+    if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
       return getUNDEF(VT);
 
     // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
@@ -4635,6 +4793,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     }
   }
 
+  // Any FP binop with an undef operand is folded to NaN. This matches the
+  // behavior of the IR optimizer.
+  switch (Opcode) {
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+    if (N1.isUndef() || N2.isUndef())
+      return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
+  }
+
   // Canonicalize an UNDEF to the RHS, even over a constant.
   if (N1.isUndef()) {
     if (TLI->isCommutativeBinOp(Opcode)) {
@@ -4644,22 +4814,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       case ISD::FP_ROUND_INREG:
       case ISD::SIGN_EXTEND_INREG:
       case ISD::SUB:
-      case ISD::FSUB:
-      case ISD::FDIV:
-      case ISD::FREM:
-      case ISD::SRA:
-        return N1;     // fold op(undef, arg2) -> undef
+        return getUNDEF(VT);     // fold op(undef, arg2) -> undef
       case ISD::UDIV:
       case ISD::SDIV:
       case ISD::UREM:
       case ISD::SREM:
+      case ISD::SRA:
       case ISD::SRL:
       case ISD::SHL:
-        if (!VT.isVector())
-          return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0
-        // For vectors, we can't easily build an all zero vector, just return
-        // the LHS.
-        return N2;
+        return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0
       }
     }
   }
@@ -4681,32 +4844,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     case ISD::SDIV:
     case ISD::UREM:
     case ISD::SREM:
-      return N2;       // fold op(arg1, undef) -> undef
-    case ISD::FADD:
-    case ISD::FSUB:
-    case ISD::FMUL:
-    case ISD::FDIV:
-    case ISD::FREM:
-      if (getTarget().Options.UnsafeFPMath)
-        return N2;
-      break;
-    case ISD::MUL:
-    case ISD::AND:
+    case ISD::SRA:
     case ISD::SRL:
     case ISD::SHL:
-      if (!VT.isVector())
-        return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0
-      // For vectors, we can't easily build an all zero vector, just return
-      // the LHS.
-      return N1;
+      return getUNDEF(VT);       // fold op(arg1, undef) -> undef
+    case ISD::MUL:
+    case ISD::AND:
+      return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0
     case ISD::OR:
-      if (!VT.isVector())
-        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
-      // For vectors, we can't easily build an all one vector, just return
-      // the LHS.
-      return N1;
-    case ISD::SRA:
-      return N1;
+      return getAllOnesConstant(DL, VT);
     }
   }
 
@@ -4739,10 +4885,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
-                              SDValue N1, SDValue N2, SDValue N3) {
+                              SDValue N1, SDValue N2, SDValue N3,
+                              const SDNodeFlags Flags) {
   // Perform various simplifications.
   switch (Opcode) {
   case ISD::FMA: {
+    assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+    assert(N1.getValueType() == VT && N2.getValueType() == VT &&
+           N3.getValueType() == VT && "FMA types must match!");
     ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
     ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
     ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3);
@@ -4833,10 +4983,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops);
     void *IP = nullptr;
-    if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+      E->intersectFlagsWith(Flags);
       return SDValue(E, 0);
+    }
 
     N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+    N->setFlags(Flags);
     createOperands(N, Ops);
     CSEMap.InsertNode(N, IP);
   } else {
@@ -5107,6 +5260,31 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
   return MF.getFunction().optForSize();
 }
 
+static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+                          SmallVector<SDValue, 32> &OutChains, unsigned From,
+                          unsigned To, SmallVector<SDValue, 16> &OutLoadChains,
+                          SmallVector<SDValue, 16> &OutStoreChains) {
+  assert(OutLoadChains.size() && "Missing loads in memcpy inlining");
+  assert(OutStoreChains.size() && "Missing stores in memcpy inlining");
+  SmallVector<SDValue, 16> GluedLoadChains;
+  for (unsigned i = From; i < To; ++i) {
+    OutChains.push_back(OutLoadChains[i]);
+    GluedLoadChains.push_back(OutLoadChains[i]);
+  }
+
+  // Chain for all loads.
+  SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                  GluedLoadChains);
+
+  for (unsigned i = From; i < To; ++i) {
+    StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]);
+    SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(),
+                                  ST->getBasePtr(), ST->getMemoryVT(),
+                                  ST->getMemOperand());
+    OutChains.push_back(NewStore);
+  }
+}
+
 static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
                                        SDValue Chain, SDValue Dst, SDValue Src,
                                        uint64_t Size, unsigned Align,
@@ -5171,7 +5349,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
 
   MachineMemOperand::Flags MMOFlags =
       isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
-  SmallVector<SDValue, 8> OutChains;
+  SmallVector<SDValue, 16> OutLoadChains;
+  SmallVector<SDValue, 16> OutStoreChains;
+  SmallVector<SDValue, 32> OutChains;
   unsigned NumMemOps = MemOps.size();
   uint64_t SrcOff = 0, DstOff = 0;
   for (unsigned i = 0; i != NumMemOps; ++i) {
@@ -5205,11 +5385,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
         SubSlice.Length = VTSize;
       }
       Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
-      if (Value.getNode())
+      if (Value.getNode()) {
         Store = DAG.getStore(Chain, dl, Value,
                              DAG.getMemBasePlusOffset(Dst, DstOff, dl),
                              DstPtrInfo.getWithOffset(DstOff), Align,
                              MMOFlags);
+        OutChains.push_back(Store);
+      }
     }
 
     if (!Store.getNode()) {
@@ -5231,17 +5413,61 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
                              DAG.getMemBasePlusOffset(Src, SrcOff, dl),
                              SrcPtrInfo.getWithOffset(SrcOff), VT,
                              MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
-      OutChains.push_back(Value.getValue(1));
+      OutLoadChains.push_back(Value.getValue(1));
+
       Store = DAG.getTruncStore(
           Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
           DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
+      OutStoreChains.push_back(Store);
     }
-    OutChains.push_back(Store);
     SrcOff += VTSize;
     DstOff += VTSize;
     Size -= VTSize;
   }
 
+  unsigned GluedLdStLimit = MaxLdStGlue == 0 ?
+                                TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue;
+  unsigned NumLdStInMemcpy = OutStoreChains.size();
+
+  if (NumLdStInMemcpy) {
+    // It may be that memcpy might be converted to memset if it's memcpy
+    // of constants. In such a case, we won't have loads and stores, but
+    // just stores. In the absence of loads, there is nothing to gang up.
+    if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) {
+      // If target does not care, just leave as it.
+      for (unsigned i = 0; i < NumLdStInMemcpy; ++i) {
+        OutChains.push_back(OutLoadChains[i]);
+        OutChains.push_back(OutStoreChains[i]);
+      }
+    } else {
+      // Ld/St less than/equal limit set by target.
+      if (NumLdStInMemcpy <= GluedLdStLimit) {
+          chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+                                        NumLdStInMemcpy, OutLoadChains,
+                                        OutStoreChains);
+      } else {
+        unsigned NumberLdChain =  NumLdStInMemcpy / GluedLdStLimit;
+        unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit;
+        unsigned GlueIter = 0;
+
+        for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) {
+          unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit;
+          unsigned IndexTo   = NumLdStInMemcpy - GlueIter;
+
+          chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo,
+                                       OutLoadChains, OutStoreChains);
+          GlueIter += GluedLdStLimit;
+        }
+
+        // Residual ld/st.
+        if (RemainingLdStInMemcpy) {
+          chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+                                        RemainingLdStInMemcpy, OutLoadChains,
+                                        OutStoreChains);
+        }
+      }
+    }
+  }
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
 }
 
@@ -5334,7 +5560,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
 }
 
-/// \brief Lower the call to 'memset' intrinsic function into a series of store
+/// Lower the call to 'memset' intrinsic function into a series of store
 /// operations.
 ///
 /// \param DAG Selection DAG where lowered code is placed.
@@ -5518,6 +5744,47 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
   return CallResult.second;
 }
 
+SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
+                                      SDValue Dst, unsigned DstAlign,
+                                      SDValue Src, unsigned SrcAlign,
+                                      SDValue Size, Type *SizeTy,
+                                      unsigned ElemSz, bool isTailCall,
+                                      MachinePointerInfo DstPtrInfo,
+                                      MachinePointerInfo SrcPtrInfo) {
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+  Entry.Node = Dst;
+  Args.push_back(Entry);
+
+  Entry.Node = Src;
+  Args.push_back(Entry);
+
+  Entry.Ty = SizeTy;
+  Entry.Node = Size;
+  Args.push_back(Entry);
+
+  RTLIB::Libcall LibraryCall =
+      RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+  if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+    report_fatal_error("Unsupported element size");
+
+  TargetLowering::CallLoweringInfo CLI(*this);
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+                    Type::getVoidTy(*getContext()),
+                    getExternalSymbol(TLI->getLibcallName(LibraryCall),
+                                      TLI->getPointerTy(getDataLayout())),
+                    std::move(Args))
+      .setDiscardResult()
+      .setTailCall(isTailCall);
+
+  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+  return CallResult.second;
+}
+
 SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
                                  SDValue Src, SDValue Size, unsigned Align,
                                  bool isVol, bool isTailCall,
@@ -5579,6 +5846,47 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
   return CallResult.second;
 }
 
+SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
+                                       SDValue Dst, unsigned DstAlign,
+                                       SDValue Src, unsigned SrcAlign,
+                                       SDValue Size, Type *SizeTy,
+                                       unsigned ElemSz, bool isTailCall,
+                                       MachinePointerInfo DstPtrInfo,
+                                       MachinePointerInfo SrcPtrInfo) {
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+  Entry.Node = Dst;
+  Args.push_back(Entry);
+
+  Entry.Node = Src;
+  Args.push_back(Entry);
+
+  Entry.Ty = SizeTy;
+  Entry.Node = Size;
+  Args.push_back(Entry);
+
+  RTLIB::Libcall LibraryCall =
+      RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+  if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+    report_fatal_error("Unsupported element size");
+
+  TargetLowering::CallLoweringInfo CLI(*this);
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+                    Type::getVoidTy(*getContext()),
+                    getExternalSymbol(TLI->getLibcallName(LibraryCall),
+                                      TLI->getPointerTy(getDataLayout())),
+                    std::move(Args))
+      .setDiscardResult()
+      .setTailCall(isTailCall);
+
+  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+  return CallResult.second;
+}
+
 SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
                                 SDValue Src, SDValue Size, unsigned Align,
                                 bool isVol, bool isTailCall,
@@ -5641,6 +5949,46 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
   return CallResult.second;
 }
 
+SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
+                                      SDValue Dst, unsigned DstAlign,
+                                      SDValue Value, SDValue Size, Type *SizeTy,
+                                      unsigned ElemSz, bool isTailCall,
+                                      MachinePointerInfo DstPtrInfo) {
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+  Entry.Node = Dst;
+  Args.push_back(Entry);
+
+  Entry.Ty = Type::getInt8Ty(*getContext());
+  Entry.Node = Value;
+  Args.push_back(Entry);
+
+  Entry.Ty = SizeTy;
+  Entry.Node = Size;
+  Args.push_back(Entry);
+
+  RTLIB::Libcall LibraryCall =
+      RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+  if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+    report_fatal_error("Unsupported element size");
+
+  TargetLowering::CallLoweringInfo CLI(*this);
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+                    Type::getVoidTy(*getContext()),
+                    getExternalSymbol(TLI->getLibcallName(LibraryCall),
+                                      TLI->getPointerTy(getDataLayout())),
+                    std::move(Args))
+      .setDiscardResult()
+      .setTailCall(isTailCall);
+
+  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+  return CallResult.second;
+}
+
 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                                 SDVTList VTList, ArrayRef<SDValue> Ops,
                                 MachineMemOperand *MMO) {
@@ -5736,6 +6084,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
   assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
           Opcode == ISD::ATOMIC_LOAD_SUB ||
           Opcode == ISD::ATOMIC_LOAD_AND ||
+          Opcode == ISD::ATOMIC_LOAD_CLR ||
           Opcode == ISD::ATOMIC_LOAD_OR ||
           Opcode == ISD::ATOMIC_LOAD_XOR ||
           Opcode == ISD::ATOMIC_LOAD_NAND ||
@@ -6207,7 +6556,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
 SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
                                       ArrayRef<SDValue> Ops,
                                       MachineMemOperand *MMO) {
-  assert(Ops.size() == 5 && "Incompatible number of operands");
+  assert(Ops.size() == 6 && "Incompatible number of operands");
 
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
@@ -6233,6 +6582,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
   assert(N->getIndex().getValueType().getVectorNumElements() ==
              N->getValueType(0).getVectorNumElements() &&
          "Vector width mismatch between index and data");
+  assert(isa<ConstantSDNode>(N->getScale()) &&
+         cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+         "Scale should be a constant power of 2");
 
   CSEMap.InsertNode(N, IP);
   InsertNode(N);
@@ -6244,7 +6596,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
 SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
                                        ArrayRef<SDValue> Ops,
                                        MachineMemOperand *MMO) {
-  assert(Ops.size() == 5 && "Incompatible number of operands");
+  assert(Ops.size() == 6 && "Incompatible number of operands");
 
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
@@ -6267,6 +6619,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
   assert(N->getIndex().getValueType().getVectorNumElements() ==
              N->getValue().getValueType().getVectorNumElements() &&
          "Vector width mismatch between index and data");
+  assert(isa<ConstantSDNode>(N->getScale()) &&
+         cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+         "Scale should be a constant power of 2");
 
   CSEMap.InsertNode(N, IP);
   InsertNode(N);
@@ -6558,6 +6913,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
   // Now we update the operands.
   N->OperandList[0].set(Op);
 
+  updateDivergence(N);
   // If this gets put into a CSE map, add it.
   if (InsertPos) CSEMap.InsertNode(N, InsertPos);
   return N;
@@ -6586,6 +6942,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
   if (N->OperandList[1] != Op2)
     N->OperandList[1].set(Op2);
 
+  updateDivergence(N);
   // If this gets put into a CSE map, add it.
   if (InsertPos) CSEMap.InsertNode(N, InsertPos);
   return N;
@@ -6636,6 +6993,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
     if (N->OperandList[i] != Ops[i])
       N->OperandList[i].set(Ops[i]);
 
+  updateDivergence(N);
   // If this gets put into a CSE map, add it.
   if (InsertPos) CSEMap.InsertNode(N, InsertPos);
   return N;
@@ -7061,11 +7419,24 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,
 /// FrameIndex
 SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
                                                 DIExpression *Expr, unsigned FI,
+                                                bool IsIndirect,
                                                 const DebugLoc &DL,
                                                 unsigned O) {
   assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
          "Expected inlined-at fields to agree");
-  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O);
+  return new (DbgInfo->getAlloc())
+      SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX);
+}
+
+/// VReg
+SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var,
+                                          DIExpression *Expr,
+                                          unsigned VReg, bool IsIndirect,
+                                          const DebugLoc &DL, unsigned O) {
+  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
+  return new (DbgInfo->getAlloc())
+      SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG);
 }
 
 void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
@@ -7155,8 +7526,9 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
                         DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
         ClonedDVs.push_back(Clone);
         DV->setIsInvalidated();
-        DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
-              dbgs() << " into " << *DIExpr << '\n');
+        LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting";
+                   N0.getNode()->dumprFull(this);
+                   dbgs() << " into " << *DIExpr << '\n');
       }
     }
   }
@@ -7165,6 +7537,14 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
     AddDbgValue(Dbg, Dbg->getSDNode(), false);
 }
 
+/// Creates a SDDbgLabel node.
+SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label,
+                                      const DebugLoc &DL, unsigned O) {
+  assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
+  return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O);
+}
+
 namespace {
 
 /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
@@ -7227,8 +7607,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
       SDUse &Use = UI.getUse();
       ++UI;
       Use.set(To);
+      if (To->isDivergent() != From->isDivergent())
+        updateDivergence(User);
     } while (UI != UE && *UI == User);
-
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User);
@@ -7282,6 +7663,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
       SDUse &Use = UI.getUse();
       ++UI;
       Use.setNode(To);
+      if (To->isDivergent() != From->isDivergent())
+        updateDivergence(User);
     } while (UI != UE && *UI == User);
 
     // Now that we have modified User, add it back to the CSE maps.  If it
@@ -7326,8 +7709,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
       const SDValue &ToOp = To[Use.getResNo()];
       ++UI;
       Use.set(ToOp);
+      if (To->getNode()->isDivergent() != From->isDivergent())
+        updateDivergence(User);
     } while (UI != UE && *UI == User);
-
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User);
@@ -7385,8 +7769,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
 
       ++UI;
       Use.set(To);
+      if (To->isDivergent() != From->isDivergent())
+        updateDivergence(User);
     } while (UI != UE && *UI == User);
-
     // We are iterating over all uses of the From node, so if a use
     // doesn't use the specific value, no changes are made.
     if (!UserRemovedFromCSEMaps)
@@ -7419,6 +7804,72 @@ namespace {
 
 } // end anonymous namespace
 
+void SelectionDAG::updateDivergence(SDNode * N)
+{
+  if (TLI->isSDNodeAlwaysUniform(N))
+    return;
+  bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+  for (auto &Op : N->ops()) {
+    if (Op.Val.getValueType() != MVT::Other)
+      IsDivergent |= Op.getNode()->isDivergent();
+  }
+  if (N->SDNodeBits.IsDivergent != IsDivergent) {
+    N->SDNodeBits.IsDivergent = IsDivergent;
+    for (auto U : N->uses()) {
+      updateDivergence(U);
+    }
+  }
+}
+
+
+void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
+  DenseMap<SDNode *, unsigned> Degree;
+  Order.reserve(AllNodes.size());
+  for (auto & N : allnodes()) {
+    unsigned NOps = N.getNumOperands();
+    Degree[&N] = NOps;
+    if (0 == NOps)
+      Order.push_back(&N);
+  }
+  for (std::vector<SDNode *>::iterator I = Order.begin();
+  I!=Order.end();++I) {
+    SDNode * N = *I;
+    for (auto U : N->uses()) {
+      unsigned &UnsortedOps = Degree[U];
+      if (0 == --UnsortedOps)
+        Order.push_back(U);
+    }
+  }
+}
+
+void SelectionDAG::VerifyDAGDiverence()
+{
+  std::vector<SDNode*> TopoOrder;
+  CreateTopologicalOrder(TopoOrder);
+  const TargetLowering &TLI = getTargetLoweringInfo();
+  DenseMap<const SDNode *, bool> DivergenceMap;
+  for (auto &N : allnodes()) {
+    DivergenceMap[&N] = false;
+  }
+  for (auto N : TopoOrder) {
+    bool IsDivergent = DivergenceMap[N];
+    bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA);
+    for (auto &Op : N->ops()) {
+      if (Op.Val.getValueType() != MVT::Other)
+        IsSDNodeDivergent |= DivergenceMap[Op.getNode()];
+    }
+    if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) {
+      DivergenceMap[N] = true;
+    }
+  }
+  for (auto &N : allnodes()) {
+    (void)N;
+    assert(DivergenceMap[&N] == N.isDivergent() &&
+           "Divergence bit inconsistency detected\n");
+  }
+}
+
+
 /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
 /// uses of other values produced by From.getNode() alone.  The same value
 /// may appear in both the From and To list.  The Deleted vector is
@@ -7450,7 +7901,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
   }
 
   // Sort the uses, so that all the uses from a given User are together.
-  std::sort(Uses.begin(), Uses.end());
+  llvm::sort(Uses.begin(), Uses.end());
 
   for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
        UseIndex != UseIndexEnd; ) {
@@ -7579,6 +8030,10 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
   DbgInfo->add(DB, SD, isParameter);
 }
 
+void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) {
+  DbgInfo->add(DB);
+}
+
 SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
                                                    SDValue NewMemOp) {
   assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
@@ -7947,11 +8402,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
   if (VT.getSizeInBits() / 8 != Bytes)
     return false;
 
-  SDValue Loc = LD->getOperand(1);
-  SDValue BaseLoc = Base->getOperand(1);
-
-  auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
-  auto LocDecomp = BaseIndexOffset::match(Loc, *this);
+  auto BaseLocDecomp = BaseIndexOffset::match(Base, *this);
+  auto LocDecomp = BaseIndexOffset::match(LD, *this);
 
   int64_t Offset = 0;
   if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
@@ -7966,8 +8418,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   const GlobalValue *GV;
   int64_t GVOffset = 0;
   if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
-    unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
-    KnownBits Known(PtrWidth);
+    unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType());
+    KnownBits Known(IdxWidth);
     llvm::computeKnownBits(GV, Known, getDataLayout());
     unsigned AlignBits = Known.countMinTrailingZeros();
     unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
@@ -8201,7 +8653,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
   return true;
 }
 
-// \brief Returns the SDNode if it is a constant integer BuildVector
+// Returns the SDNode if it is a constant integer BuildVector
 // or constant integer.
 SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
   if (isa<ConstantSDNode>(N))
@@ -8227,6 +8679,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
   return nullptr;
 }
 
+void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
+  assert(!Node->OperandList && "Node already has operands");
+  SDUse *Ops = OperandRecycler.allocate(
+    ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+
+  bool IsDivergent = false;
+  for (unsigned I = 0; I != Vals.size(); ++I) {
+    Ops[I].setUser(Node);
+    Ops[I].setInitial(Vals[I]);
+    if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence.
+      IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent();
+  }
+  Node->NumOperands = Vals.size();
+  Node->OperandList = Ops;
+  IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
+  if (!TLI->isSDNodeAlwaysUniform(Node))
+    Node->SDNodeBits.IsDivergent = IsDivergent;
+  checkForCycles(Node);
+}
+
 #ifndef NDEBUG
 static void checkForCyclesHelper(const SDNode *N,
                                  SmallPtrSetImpl<const SDNode*> &Visited,