vendor/llvm/llvm-release_39-r276489

author: Dimitry Andric <dim@FreeBSD.org> 2016-07-23 20:41:05 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2016-07-23 20:41:05 +0000
commit: 01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch)
tree: 4def12e759965de927d963ac65840d663ef9d1ea /include/llvm/CodeGen
parent: f0f4822ed4b66e3579e92a89f368f8fb860e218e (diff)
78 files changed, 4836 insertions, 1835 deletions
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index 38e64ad3be29a..2e4dc49a1e26b 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -17,10 +17,12 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/Support/CodeGen.h"
 
 namespace llvm {
 class GlobalValue;
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index cf29fc9ef889f..de618d1735733 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -34,6 +34,7 @@ class ConstantArray;
 class DIE;
 class DIEAbbrev;
 class GCMetadataPrinter;
+class GlobalIndirectSymbol;
 class GlobalValue;
 class GlobalVariable;
 class MachineBasicBlock;
@@ -147,6 +148,8 @@ public:
   DwarfDebug *getDwarfDebug() { return DD; }
   DwarfDebug *getDwarfDebug() const { return DD; }
 
+  bool isPositionIndependent() const;
+
   /// Return true if assembly output should contain comments.
   ///
   bool isVerbose() const { return VerboseAsm; }
@@ -238,11 +241,6 @@ public:
   ///
   virtual void EmitJumpTableInfo();
 
-  /// Emit the control variable for an emulated TLS variable.
-  virtual void EmitEmulatedTLSControlVariable(const GlobalVariable *GV,
-                                              MCSymbol *EmittedSym,
-                                              bool AllZeroInitValue);
-
   /// Emit the specified global variable to the .s file.
   virtual void EmitGlobalVariable(const GlobalVariable *GV);
 
@@ -551,6 +549,9 @@ private:
   void EmitXXStructorList(const DataLayout &DL, const Constant *List,
                           bool isCtor);
   GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C);
+  /// Emit GlobalAlias or GlobalIFunc.
+  void emitGlobalIndirectSymbol(Module &M,
+                                const GlobalIndirectSymbol& GIS);
 };
 }
 
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index d99054eb6f368..69951afb623c3 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -105,6 +105,11 @@ public:
 
   /// \name Scalar TTI Implementations
   /// @{
+  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
+                                      unsigned Alignment, bool *Fast) const {
+    MVT M = MVT::getIntegerVT(BitWidth);
+    return getTLI()->allowsMisalignedMemoryAccesses(M, AddressSpace, Alignment, Fast);
+  }
 
   bool hasBranchDivergence() { return false; }
 
@@ -152,6 +157,11 @@ public:
     return getTLI()->isTypeLegal(VT);
   }
 
+  int getGEPCost(Type *PointeeType, const Value *Ptr,
+                 ArrayRef<const Value *> Operands) {
+    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
+  }
+
   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                             ArrayRef<const Value *> Arguments) {
     return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
@@ -216,6 +226,8 @@ public:
     return BaseT::getOperationCost(Opcode, Ty, OpTy);
   }
 
+  unsigned getInliningThresholdMultiplier() { return 1; }
+
   void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) {
     // This unrolling functionality is target independent, but to provide some
     // motivation for its intended use, for x86:
@@ -307,12 +319,14 @@ public:
     }
 
     if (!TLI->isOperationExpand(ISD, LT.second)) {
-      // If the operation is custom lowered then assume
-      // thare the code is twice as expensive.
+      // If the operation is custom lowered, then assume that the code is twice
+      // as expensive.
       return LT.first * 2 * OpCost;
     }
 
     // Else, assume that we need to scalarize this op.
+    // TODO: If one of the types get legalized by splitting, handle this
+    // similarly to what getCastInstrCost() does.
     if (Ty->isVectorTy()) {
       unsigned Num = Ty->getVectorNumElements();
       unsigned Cost = static_cast<T *>(this)
@@ -359,6 +373,11 @@ public:
         TLI->isZExtFree(SrcLT.second, DstLT.second))
       return 0;
 
+    if (Opcode == Instruction::AddrSpaceCast &&
+        TLI->isNoopAddrSpaceCast(Src->getPointerAddressSpace(),
+                                 Dst->getPointerAddressSpace()))
+      return 0;
+
     // If the cast is marked as legal (or promote) then assume low cost.
     if (SrcLT.first == DstLT.first &&
         TLI->isOperationLegalOrPromote(ISD, DstLT.second))
@@ -402,9 +421,25 @@ public:
           return SrcLT.first * 1;
       }
 
-      // If we are converting vectors and the operation is illegal, or
-      // if the vectors are legalized to different types, estimate the
-      // scalarization costs.
+      // If we are legalizing by splitting, query the concrete TTI for the cost
+      // of casting the original vector twice. We also need to factor int the
+      // cost of the split itself. Count that as 1, to be consistent with
+      // TLI->getTypeLegalizationCost().
+      if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
+           TargetLowering::TypeSplitVector) ||
+          (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
+           TargetLowering::TypeSplitVector)) {
+        Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
+                                         Dst->getVectorNumElements() / 2);
+        Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
+                                         Src->getVectorNumElements() / 2);
+        T *TTI = static_cast<T *>(this);
+        return TTI->getVectorSplitCost() +
+               (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc));
+      }
+
+      // In other cases where the source or destination are illegal, assume
+      // the operation will get scalarized.
       unsigned Num = Dst->getVectorNumElements();
       unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
           Opcode, Dst->getScalarType(), Src->getScalarType());
@@ -428,6 +463,14 @@ public:
     llvm_unreachable("Unhandled cast");
   }
 
+  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+                                    VectorType *VecTy, unsigned Index) {
+    return static_cast<T *>(this)->getVectorInstrCost(
+               Instruction::ExtractElement, VecTy, Index) +
+           static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
+                                                    VecTy->getElementType());
+  }
+
   unsigned getCFInstrCost(unsigned Opcode) {
     // Branches are assumed to be predicted.
     return 0;
@@ -454,6 +497,8 @@ public:
     }
 
     // Otherwise, assume that the cast is scalarized.
+    // TODO: If one of the types get legalized by splitting, handle this
+    // similarly to what getCastInstrCost() does.
     if (ValTy->isVectorTy()) {
       unsigned Num = ValTy->getVectorNumElements();
       if (CondTy)
@@ -462,8 +507,7 @@ public:
           Opcode, ValTy->getScalarType(), CondTy);
 
       // Return the cost of multiple scalar invocation plus the cost of
-      // inserting
-      // and extracting the values.
+      // inserting and extracting the values.
       return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
     }
 
@@ -527,6 +571,51 @@ public:
     unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
         Opcode, VecTy, Alignment, AddressSpace);
 
+    // Legalize the vector type, and get the legalized and unlegalized type
+    // sizes.
+    MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
+    unsigned VecTySize =
+        static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
+    unsigned VecTyLTSize = VecTyLT.getStoreSize();
+
+    // Return the ceiling of dividing A by B.
+    auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
+
+    // Scale the cost of the memory operation by the fraction of legalized
+    // instructions that will actually be used. We shouldn't account for the
+    // cost of dead instructions since they will be removed.
+    //
+    // E.g., An interleaved load of factor 8:
+    //       %vec = load <16 x i64>, <16 x i64>* %ptr
+    //       %v0 = shufflevector %vec, undef, <0, 8>
+    //
+    // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
+    // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
+    // type). The other loads are unused.
+    //
+    // We only scale the cost of loads since interleaved store groups aren't
+    // allowed to have gaps.
+    if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
+
+      // The number of loads of a legal type it will take to represent a load
+      // of the unlegalized vector type.
+      unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
+
+      // The number of elements of the unlegalized type that correspond to a
+      // single legal instruction.
+      unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
+
+      // Determine which legal instructions will be used.
+      BitVector UsedInsts(NumLegalInsts, false);
+      for (unsigned Index : Indices)
+        for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
+          UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
+
+      // Scale the cost of the load by the fraction of legal instructions that
+      // will be used.
+      Cost *= UsedInsts.count() / NumLegalInsts;
+    }
+
     // Then plus the cost of interleave operation.
     if (Opcode == Instruction::Load) {
       // The interleave cost is similar to extract sub vectors' elements
@@ -582,13 +671,14 @@ public:
 
   /// Get intrinsic cost based on arguments  
   unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                                 ArrayRef<Value *> Args) {
+                                 ArrayRef<Value *> Args, FastMathFlags FMF) {
     switch (IID) {
     default: {
       SmallVector<Type *, 4> Types;
       for (Value *Op : Args)
         Types.push_back(Op->getType());
-      return getIntrinsicInstrCost(IID, RetTy, Types);
+      return static_cast<T *>(this)->getIntrinsicInstrCost(IID, RetTy, Types,
+                                                           FMF);
     }
     case Intrinsic::masked_scatter: {
       Value *Mask = Args[3];
@@ -614,8 +704,9 @@ public:
   
   /// Get intrinsic cost based on argument types
   unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                                 ArrayRef<Type *> Tys) {
-    unsigned ISD = 0;
+                                 ArrayRef<Type *> Tys, FastMathFlags FMF) {
+    SmallVector<unsigned, 2> ISDs;
+    unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
     switch (IID) {
     default: {
       // Assume that we need to scalarize this intrinsic.
@@ -641,74 +732,78 @@ public:
         return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
 
       unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
-          IID, ScalarRetTy, ScalarTys);
+          IID, ScalarRetTy, ScalarTys, FMF);
 
       return ScalarCalls * ScalarCost + ScalarizationCost;
     }
     // Look for intrinsics that can be lowered directly or turned into a scalar
     // intrinsic call.
     case Intrinsic::sqrt:
-      ISD = ISD::FSQRT;
+      ISDs.push_back(ISD::FSQRT);
       break;
     case Intrinsic::sin:
-      ISD = ISD::FSIN;
+      ISDs.push_back(ISD::FSIN);
       break;
     case Intrinsic::cos:
-      ISD = ISD::FCOS;
+      ISDs.push_back(ISD::FCOS);
       break;
     case Intrinsic::exp:
-      ISD = ISD::FEXP;
+      ISDs.push_back(ISD::FEXP);
       break;
     case Intrinsic::exp2:
-      ISD = ISD::FEXP2;
+      ISDs.push_back(ISD::FEXP2);
       break;
     case Intrinsic::log:
-      ISD = ISD::FLOG;
+      ISDs.push_back(ISD::FLOG);
       break;
     case Intrinsic::log10:
-      ISD = ISD::FLOG10;
+      ISDs.push_back(ISD::FLOG10);
       break;
     case Intrinsic::log2:
-      ISD = ISD::FLOG2;
+      ISDs.push_back(ISD::FLOG2);
       break;
     case Intrinsic::fabs:
-      ISD = ISD::FABS;
+      ISDs.push_back(ISD::FABS);
       break;
     case Intrinsic::minnum:
-      ISD = ISD::FMINNUM;
+      ISDs.push_back(ISD::FMINNUM);
+      if (FMF.noNaNs())
+        ISDs.push_back(ISD::FMINNAN);
       break;
     case Intrinsic::maxnum:
-      ISD = ISD::FMAXNUM;
+      ISDs.push_back(ISD::FMAXNUM);
+      if (FMF.noNaNs())
+        ISDs.push_back(ISD::FMAXNAN);
       break;
     case Intrinsic::copysign:
-      ISD = ISD::FCOPYSIGN;
+      ISDs.push_back(ISD::FCOPYSIGN);
       break;
     case Intrinsic::floor:
-      ISD = ISD::FFLOOR;
+      ISDs.push_back(ISD::FFLOOR);
       break;
     case Intrinsic::ceil:
-      ISD = ISD::FCEIL;
+      ISDs.push_back(ISD::FCEIL);
       break;
     case Intrinsic::trunc:
-      ISD = ISD::FTRUNC;
+      ISDs.push_back(ISD::FTRUNC);
       break;
     case Intrinsic::nearbyint:
-      ISD = ISD::FNEARBYINT;
+      ISDs.push_back(ISD::FNEARBYINT);
       break;
     case Intrinsic::rint:
-      ISD = ISD::FRINT;
+      ISDs.push_back(ISD::FRINT);
       break;
     case Intrinsic::round:
-      ISD = ISD::FROUND;
+      ISDs.push_back(ISD::FROUND);
       break;
     case Intrinsic::pow:
-      ISD = ISD::FPOW;
+      ISDs.push_back(ISD::FPOW);
       break;
     case Intrinsic::fma:
-      ISD = ISD::FMA;
+      ISDs.push_back(ISD::FMA);
       break;
     case Intrinsic::fmuladd:
-      ISD = ISD::FMA;
+      ISDs.push_back(ISD::FMA);
       break;
     // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
     case Intrinsic::lifetime_start:
@@ -720,27 +815,49 @@ public:
     case Intrinsic::masked_load:
       return static_cast<T *>(this)
           ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
+    case Intrinsic::ctpop:
+      ISDs.push_back(ISD::CTPOP);
+      // In case of legalization use TCC_Expensive. This is cheaper than a
+      // library call but still not a cheap instruction.
+      SingleCallCost = TargetTransformInfo::TCC_Expensive;
+      break;
+    // FIXME: ctlz, cttz, ...
     }
 
     const TargetLoweringBase *TLI = getTLI();
     std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
 
-    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
-      // The operation is legal. Assume it costs 1.
-      // If the type is split to multiple registers, assume that there is some
-      // overhead to this.
-      // TODO: Once we have extract/insert subvector cost we need to use them.
-      if (LT.first > 1)
-        return LT.first * 2;
-      return LT.first * 1;
-    }
+    SmallVector<unsigned, 2> LegalCost;
+    SmallVector<unsigned, 2> CustomCost;
+    for (unsigned ISD : ISDs) {
+      if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+        if (IID == Intrinsic::fabs && TLI->isFAbsFree(LT.second)) {
+          return 0;
+        }
 
-    if (!TLI->isOperationExpand(ISD, LT.second)) {
-      // If the operation is custom lowered then assume
-      // thare the code is twice as expensive.
-      return LT.first * 2;
+        // The operation is legal. Assume it costs 1.
+        // If the type is split to multiple registers, assume that there is some
+        // overhead to this.
+        // TODO: Once we have extract/insert subvector cost we need to use them.
+        if (LT.first > 1)
+          LegalCost.push_back(LT.first * 2);
+        else
+          LegalCost.push_back(LT.first * 1);
+      } else if (!TLI->isOperationExpand(ISD, LT.second)) {
+        // If the operation is custom lowered then assume
+        // that the code is twice as expensive.
+        CustomCost.push_back(LT.first * 2);
+      }
     }
 
+    auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
+    if (MinLegalCostI != LegalCost.end())
+      return *MinLegalCostI;
+
+    auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
+    if (MinCustomCostI != CustomCost.end())
+      return *MinCustomCostI;
+
     // If we can't lower fmuladd into an FMA estimate the cost as a floating
     // point mul followed by an add.
     if (IID == Intrinsic::fmuladd)
@@ -763,7 +880,7 @@ public:
         ScalarTys.push_back(Ty);
       }
       unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
-          IID, RetTy->getScalarType(), ScalarTys);
+          IID, RetTy->getScalarType(), ScalarTys, FMF);
       for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
         if (Tys[i]->isVectorTy()) {
           ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
@@ -775,7 +892,7 @@ public:
     }
 
     // This is going to be turned into a library call, make it expensive.
-    return 10;
+    return SingleCallCost;
   }
 
   /// \brief Compute a cost of the given call instruction.
@@ -815,6 +932,8 @@ public:
     return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
   }
 
+  unsigned getVectorSplitCost() { return 1; }
+
   /// @}
 };
 
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 415abb90da576..92e58564e0408 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -195,6 +195,7 @@ class CCState {
 private:
   CallingConv::ID CallingConv;
   bool IsVarArg;
+  bool AnalyzingMustTailForwardedRegs = false;
   MachineFunction &MF;
   const TargetRegisterInfo &TRI;
   SmallVectorImpl<CCValAssign> &Locs;
@@ -281,7 +282,7 @@ public:
   /// be able to store all arguments and such that the alignment requirement
   /// of each of the arguments is satisfied.
   unsigned getAlignedCallFrameSize() const {
-    return RoundUpToAlignment(StackOffset, MaxStackArgAlign);
+    return alignTo(StackOffset, MaxStackArgAlign);
   }
 
   /// isAllocated - Return true if the specified register (or an alias) is
@@ -412,14 +413,19 @@ public:
   /// and alignment.
   unsigned AllocateStack(unsigned Size, unsigned Align) {
     assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2.
-    StackOffset = RoundUpToAlignment(StackOffset, Align);
+    StackOffset = alignTo(StackOffset, Align);
     unsigned Result = StackOffset;
     StackOffset += Size;
     MaxStackArgAlign = std::max(Align, MaxStackArgAlign);
-    MF.getFrameInfo()->ensureMaxAlignment(Align);
+    ensureMaxAlignment(Align);
     return Result;
   }
 
+  void ensureMaxAlignment(unsigned Align) {
+    if (!AnalyzingMustTailForwardedRegs)
+      MF.getFrameInfo()->ensureMaxAlignment(Align);
+  }
+
   /// Version of AllocateStack with extra register to be shadowed.
   unsigned AllocateStack(unsigned Size, unsigned Align, unsigned ShadowReg) {
     MarkAllocated(ShadowReg);
@@ -507,6 +513,14 @@ public:
       SmallVectorImpl<ForwardedRegister> &Forwards, ArrayRef<MVT> RegParmTypes,
       CCAssignFn Fn);
 
+  /// Returns true if the results of the two calling conventions are compatible.
+  /// This is usually part of the check for tailcall eligibility.
+  static bool resultsCompatible(CallingConv::ID CalleeCC,
+                                CallingConv::ID CallerCC, MachineFunction &MF,
+                                LLVMContext &C,
+                                const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn CalleeFn, CCAssignFn CallerFn);
+
 private:
   /// MarkAllocated - Mark a register and all of its aliases as allocated.
   void MarkAllocated(unsigned Reg);
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
index 0d37dc00422fa..6376c06768b3a 100644
--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -46,20 +46,23 @@ MAttrs("mattr",
        cl::desc("Target specific attributes (-mattr=help for details)"),
        cl::value_desc("a1,+a2,-a3,..."));
 
-cl::opt<Reloc::Model>
-RelocModel("relocation-model",
-           cl::desc("Choose relocation model"),
-           cl::init(Reloc::Default),
-           cl::values(
-              clEnumValN(Reloc::Default, "default",
-                      "Target default relocation model"),
-              clEnumValN(Reloc::Static, "static",
-                      "Non-relocatable code"),
-              clEnumValN(Reloc::PIC_, "pic",
-                      "Fully relocatable, position independent code"),
-              clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
-                      "Relocatable external references, non-relocatable code"),
-              clEnumValEnd));
+cl::opt<Reloc::Model> RelocModel(
+    "relocation-model", cl::desc("Choose relocation model"),
+    cl::values(
+        clEnumValN(Reloc::Static, "static", "Non-relocatable code"),
+        clEnumValN(Reloc::PIC_, "pic",
+                   "Fully relocatable, position independent code"),
+        clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+                   "Relocatable external references, non-relocatable code"),
+        clEnumValEnd));
+
+static inline Optional<Reloc::Model> getRelocModel() {
+  if (RelocModel.getNumOccurrences()) {
+    Reloc::Model R = RelocModel;
+    return R;
+  }
+  return None;
+}
 
 cl::opt<ThreadModel::Model>
 TMModel("thread-model",
@@ -87,6 +90,22 @@ CMModel("code-model",
                               "Large code model"),
                    clEnumValEnd));
 
+cl::opt<llvm::ExceptionHandling>
+ExceptionModel("exception-model",
+               cl::desc("exception model"),
+               cl::init(ExceptionHandling::None),
+               cl::values(clEnumValN(ExceptionHandling::None, "default",
+                                     "default exception handling model"),
+                          clEnumValN(ExceptionHandling::DwarfCFI, "dwarf",
+                                     "DWARF-like CFI based exception handling"),
+                          clEnumValN(ExceptionHandling::SjLj, "sjlj",
+                                     "SjLj exception handling"),
+                          clEnumValN(ExceptionHandling::ARM, "arm",
+                                     "ARM EHABI exceptions"),
+                          clEnumValN(ExceptionHandling::WinEH, "wineh",
+                                     "Windows exception model"),
+                          clEnumValEnd));
+
 cl::opt<TargetMachine::CodeGenFileType>
 FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
   cl::desc("Choose a file type (not all types are supported by all targets):"),
@@ -177,6 +196,11 @@ DisableTailCalls("disable-tail-calls",
                  cl::desc("Never emit tail calls"),
                  cl::init(false));
 
+cl::opt<bool>
+StackSymbolOrdering("stack-symbol-ordering",
+                    cl::desc("Order local stack symbols."),
+                    cl::init(true));
+
 cl::opt<unsigned>
 OverrideStackAlignment("stack-alignment",
                        cl::desc("Override default stack alignment"),
@@ -193,11 +217,6 @@ TrapFuncName("trap-func", cl::Hidden,
         cl::init(""));
 
 cl::opt<bool>
-EnablePIE("enable-pie",
-          cl::desc("Assume the creation of a position independent executable."),
-          cl::init(false));
-
-cl::opt<bool>
 UseCtors("use-ctors",
              cl::desc("Use .ctors instead of .init_array."),
              cl::init(false));
@@ -211,10 +230,6 @@ cl::opt<std::string> StartAfter("start-after",
                           cl::value_desc("pass-name"),
                           cl::init(""));
 
-cl::opt<std::string>
-    RunPass("run-pass", cl::desc("Run compiler only for one specific pass"),
-            cl::value_desc("pass-name"), cl::init(""));
-
 cl::opt<bool> DataSections("data-sections",
                            cl::desc("Emit data into separate sections"),
                            cl::init(false));
@@ -284,12 +299,13 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
   Options.NoZerosInBSS = DontPlaceZerosInBSS;
   Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
   Options.StackAlignmentOverride = OverrideStackAlignment;
-  Options.PositionIndependentExecutable = EnablePIE;
+  Options.StackSymbolOrdering = StackSymbolOrdering;
   Options.UseInitArray = !UseCtors;
   Options.DataSections = DataSections;
   Options.FunctionSections = FunctionSections;
   Options.UniqueSectionNames = UniqueSectionNames;
   Options.EmulatedTLS = EmulatedTLS;
+  Options.ExceptionModel = ExceptionModel;
 
   Options.MCOptions = InitMCTargetOptionsFromFlags();
   Options.JTType = JTableType;
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index 40ec201107e8e..8de140e91bf37 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -28,6 +28,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
 #include <map>
 
 namespace llvm {
@@ -80,7 +81,7 @@ private:
   // CachedTable is a map from <FromState, Input> to ToState.
   DenseMap<UnsignPair, unsigned> CachedTable;
 
-  // ReadTable - Read the DFA transition table and update CachedTable.
+  // Read the DFA transition table and update CachedTable.
   void ReadTable(unsigned state);
 
 public:
@@ -92,38 +93,39 @@ public:
     CurrentState = 0;
   }
 
-  // getInsnInput - Return the DFAInput for an instruction class.
+  // Return the DFAInput for an instruction class.
   DFAInput getInsnInput(unsigned InsnClass);
 
-  // getInsnInput - Return the DFAInput for an instruction class input vector.
+  // Return the DFAInput for an instruction class input vector.
   static DFAInput getInsnInput(const std::vector<unsigned> &InsnClass);
 
-  // canReserveResources - Check if the resources occupied by a MCInstrDesc
-  // are available in the current state.
+  // Check if the resources occupied by a MCInstrDesc are available in
+  // the current state.
   bool canReserveResources(const llvm::MCInstrDesc *MID);
 
-  // reserveResources - Reserve the resources occupied by a MCInstrDesc and
-  // change the current state to reflect that change.
+  // Reserve the resources occupied by a MCInstrDesc and change the current
+  // state to reflect that change.
   void reserveResources(const llvm::MCInstrDesc *MID);
 
-  // canReserveResources - Check if the resources occupied by a machine
-  // instruction are available in the current state.
-  bool canReserveResources(llvm::MachineInstr *MI);
+  // Check if the resources occupied by a machine instruction are available
+  // in the current state.
+  bool canReserveResources(llvm::MachineInstr &MI);
 
-  // reserveResources - Reserve the resources occupied by a machine
-  // instruction and change the current state to reflect that change.
-  void reserveResources(llvm::MachineInstr *MI);
+  // Reserve the resources occupied by a machine instruction and change the
+  // current state to reflect that change.
+  void reserveResources(llvm::MachineInstr &MI);
 
   const InstrItineraryData *getInstrItins() const { return InstrItins; }
 };
 
-// VLIWPacketizerList - Implements a simple VLIW packetizer using DFA. The
-// packetizer works on machine basic blocks. For each instruction I in BB, the
-// packetizer consults the DFA to see if machine resources are available to
-// execute I. If so, the packetizer checks if I depends on any instruction J in
-// the current packet. If no dependency is found, I is added to current packet
-// and machine resource is marked as taken. If any dependency is found, a target
-// API call is made to prune the dependence.
+
+// VLIWPacketizerList implements a simple VLIW packetizer using DFA. The
+// packetizer works on machine basic blocks. For each instruction I in BB,
+// the packetizer consults the DFA to see if machine resources are available
+// to execute I. If so, the packetizer checks if I depends on any instruction
+// in the current packet. If no dependency is found, I is added to current
+// packet and the machine resource is marked as taken. If any dependency is
+// found, a target API call is made to prune the dependence.
 class VLIWPacketizerList {
 protected:
   MachineFunction &MF;
@@ -132,13 +134,11 @@ protected:
 
   // The VLIW Scheduler.
   DefaultVLIWScheduler *VLIWScheduler;
-
   // Vector of instructions assigned to the current packet.
   std::vector<MachineInstr*> CurrentPacketMIs;
   // DFA resource tracker.
   DFAPacketizer *ResourceTracker;
-
-  // Generate MI -> SU map.
+  // Map: MI -> SU.
   std::map<MachineInstr*, SUnit*> MIToSUnit;
 
 public:
@@ -148,43 +148,40 @@ public:
 
   virtual ~VLIWPacketizerList();
 
-  // PacketizeMIs - Implement this API in the backend to bundle instructions.
+  // Implement this API in the backend to bundle instructions.
   void PacketizeMIs(MachineBasicBlock *MBB,
                     MachineBasicBlock::iterator BeginItr,
                     MachineBasicBlock::iterator EndItr);
 
-  // getResourceTracker - return ResourceTracker
+  // Return the ResourceTracker.
   DFAPacketizer *getResourceTracker() {return ResourceTracker;}
 
   // addToPacket - Add MI to the current packet.
-  virtual MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
-    MachineBasicBlock::iterator MII = MI;
-    CurrentPacketMIs.push_back(MI);
+  virtual MachineBasicBlock::iterator addToPacket(MachineInstr &MI) {
+    CurrentPacketMIs.push_back(&MI);
     ResourceTracker->reserveResources(MI);
-    return MII;
+    return MI;
   }
 
   // End the current packet and reset the state of the packetizer.
   // Overriding this function allows the target-specific packetizer
   // to perform custom finalization.
-  virtual void endPacket(MachineBasicBlock *MBB, MachineInstr *MI);
+  virtual void endPacket(MachineBasicBlock *MBB,
+                         MachineBasicBlock::iterator MI);
 
-  // initPacketizerState - perform initialization before packetizing
-  // an instruction. This function is supposed to be overrided by
-  // the target dependent packetizer.
-  virtual void initPacketizerState() { return; }
+  // Perform initialization before packetizing an instruction. This
+  // function is supposed to be overrided by the target dependent packetizer.
+  virtual void initPacketizerState() {}
 
-  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
-  virtual bool ignorePseudoInstruction(const MachineInstr *I,
+  // Check if the given instruction I should be ignored by the packetizer.
+  virtual bool ignorePseudoInstruction(const MachineInstr &I,
                                        const MachineBasicBlock *MBB) {
     return false;
   }
 
-  // isSoloInstruction - return true if instruction MI can not be packetized
-  // with any other instruction, which means that MI itself is a packet.
-  virtual bool isSoloInstruction(const MachineInstr *MI) {
-    return true;
-  }
+  // Return true if instruction MI can not be packetized with any other
+  // instruction, which means that MI itself is a packet.
+  virtual bool isSoloInstruction(const MachineInstr &MI) { return true; }
 
   // Check if the packetizer should try to add the given instruction to
   // the current packet. One reasons for which it may not be desirable
@@ -192,23 +189,22 @@ public:
   // would cause a stall.
   // If this function returns "false", the current packet will be ended,
   // and the instruction will be added to the next packet.
-  virtual bool shouldAddToPacket(const MachineInstr *MI) {
-    return true;
-  }
+  virtual bool shouldAddToPacket(const MachineInstr &MI) { return true; }
 
-  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
-  // together.
+  // Check if it is legal to packetize SUI and SUJ together.
   virtual bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
     return false;
   }
 
-  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
-  // and SUJ.
+  // Check if it is legal to prune dependece between SUI and SUJ.
   virtual bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
     return false;
   }
 
+  // Add a DAG mutation to be done before the packetization begins.
+  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation);
 };
-}
+
+} // namespace llvm
 
 #endif
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index 72b3adc7de99b..7d6e66fa6ec2a 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -20,7 +20,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/Support/Dwarf.h"
-#include <vector>
 
 namespace llvm {
 class AsmPrinter;
@@ -29,48 +28,6 @@ class MCSymbol;
 class raw_ostream;
 class DwarfTypeUnit;
 
-// AsmStreamerBase - A base abstract interface class defines methods that
-// can be implemented to stream objects or can be implemented to
-// calculate the size of the streamed objects.
-// The derived classes will use an AsmPrinter to implement the methods.
-//
-// TODO: complete this interface and use it to merge EmitValue and SizeOf
-//       methods in the DIE classes below.
-class AsmStreamerBase {
-protected:
-  const AsmPrinter *AP;
-  AsmStreamerBase(const AsmPrinter *AP) : AP(AP) {}
-
-public:
-  virtual ~AsmStreamerBase() {}
-  virtual unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr,
-                               unsigned PadTo = 0) = 0;
-  virtual unsigned emitInt8(unsigned char Value) = 0;
-  virtual unsigned emitBytes(StringRef Data) = 0;
-};
-
-/// EmittingAsmStreamer - Implements AbstractAsmStreamer to stream objects.
-/// Notice that the return value is not the actual size of the streamed object.
-/// For size calculation use SizeReporterAsmStreamer.
-class EmittingAsmStreamer : public AsmStreamerBase {
-public:
-  EmittingAsmStreamer(const AsmPrinter *AP) : AsmStreamerBase(AP) {}
-  unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr,
-                       unsigned PadTo = 0) override;
-  unsigned emitInt8(unsigned char Value) override;
-  unsigned emitBytes(StringRef Data) override;
-};
-
-/// SizeReporterAsmStreamer - Only reports the size of the streamed objects.
-class SizeReporterAsmStreamer : public AsmStreamerBase {
-public:
-  SizeReporterAsmStreamer(const AsmPrinter *AP) : AsmStreamerBase(AP) {}
-  unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr,
-                       unsigned PadTo = 0) override;
-  unsigned emitInt8(unsigned char Value) override;
-  unsigned emitBytes(StringRef Data) override;
-};
-
 //===--------------------------------------------------------------------===//
 /// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a
 /// Dwarf abbreviation.
@@ -286,25 +243,6 @@ public:
 };
 
 //===--------------------------------------------------------------------===//
-/// \brief A signature reference to a type unit.
-class DIETypeSignature {
-  const DwarfTypeUnit *Unit;
-
-  DIETypeSignature() = delete;
-
-public:
-  explicit DIETypeSignature(const DwarfTypeUnit &Unit) : Unit(&Unit) {}
-
-  void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
-  unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
-    assert(Form == dwarf::DW_FORM_ref_sig8);
-    return 8;
-  }
-
-  void print(raw_ostream &O) const;
-};
-
-//===--------------------------------------------------------------------===//
 /// DIELocList - Represents a pointer to a location list in the debug_loc
 /// section.
 //
@@ -350,8 +288,9 @@ private:
   /// All values that aren't standard layout (or are larger than 8 bytes)
   /// should be stored by reference instead of by value.
   typedef AlignedCharArrayUnion<DIEInteger, DIEString, DIEExpr, DIELabel,
-                                DIEDelta *, DIEEntry, DIETypeSignature,
-                                DIEBlock *, DIELoc *, DIELocList> ValTy;
+                                DIEDelta *, DIEEntry, DIEBlock *, DIELoc *,
+                                DIELocList>
+      ValTy;
   static_assert(sizeof(ValTy) <= sizeof(uint64_t) ||
                     sizeof(ValTy) <= sizeof(void *),
                 "Expected all large types to be stored via pointer");
@@ -626,7 +565,7 @@ public:
   typedef iterator_range<value_iterator> value_range;
   typedef iterator_range<const_value_iterator> const_value_range;
 
-  value_iterator addValue(BumpPtrAllocator &Alloc, DIEValue V) {
+  value_iterator addValue(BumpPtrAllocator &Alloc, const DIEValue &V) {
     List.push_back(*new (Alloc) Node(V));
     return value_iterator(ListTy::toIterator(List.back()));
   }
diff --git a/include/llvm/CodeGen/DIEValue.def b/include/llvm/CodeGen/DIEValue.def
index 2cfae7b608c6a..c5ff4010b2e4a 100644
--- a/include/llvm/CodeGen/DIEValue.def
+++ b/include/llvm/CodeGen/DIEValue.def
@@ -37,7 +37,6 @@ HANDLE_DIEVALUE_SMALL(Expr)
 HANDLE_DIEVALUE_SMALL(Label)
 HANDLE_DIEVALUE_LARGE(Delta)
 HANDLE_DIEVALUE_SMALL(Entry)
-HANDLE_DIEVALUE_SMALL(TypeSignature)
 HANDLE_DIEVALUE_LARGE(Block)
 HANDLE_DIEVALUE_LARGE(Loc)
 HANDLE_DIEVALUE_SMALL(LocList)
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index cc4e37059bb86..4bff48de38e42 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -16,7 +16,6 @@
 #define LLVM_CODEGEN_FASTISEL_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -24,6 +23,8 @@
 
 namespace llvm {
 
+class MachineConstantPool;
+
 /// \brief This is a fast-path instruction selection class that generates poor
 /// code and doesn't support illegal types or non-trivial lowering, but runs
 /// quickly.
@@ -40,12 +41,15 @@ public:
     bool IsByVal : 1;
     bool IsInAlloca : 1;
     bool IsReturned : 1;
+    bool IsSwiftSelf : 1;
+    bool IsSwiftError : 1;
     uint16_t Alignment;
 
     ArgListEntry()
         : Val(nullptr), Ty(nullptr), IsSExt(false), IsZExt(false),
           IsInReg(false), IsSRet(false), IsNest(false), IsByVal(false),
-          IsInAlloca(false), IsReturned(false), Alignment(0) {}
+          IsInAlloca(false), IsReturned(false), IsSwiftSelf(false),
+          IsSwiftError(false), Alignment(0) {}
 
     /// \brief Set CallLoweringInfo attribute flags based on a call instruction
     /// and called function attributes.
@@ -448,7 +452,7 @@ protected:
 
   /// \brief Emit an unconditional branch to the given block, unless it is the
   /// immediate (fall-through) successor, and update the CFG.
-  void fastEmitBranch(MachineBasicBlock *MBB, DebugLoc DL);
+  void fastEmitBranch(MachineBasicBlock *MBB, const DebugLoc &DL);
 
   /// Emit an unconditional branch to \p FalseMBB, obtains the branch weight
   /// and adds TrueMBB and FalseMBB to the successor list.
diff --git a/include/llvm/CodeGen/FaultMaps.h b/include/llvm/CodeGen/FaultMaps.h
index f4b6463221432..9b5a3e1ba0500 100644
--- a/include/llvm/CodeGen/FaultMaps.h
+++ b/include/llvm/CodeGen/FaultMaps.h
@@ -10,7 +10,6 @@
 #ifndef LLVM_CODEGEN_FAULTMAPS_H
 #define LLVM_CODEGEN_FAULTMAPS_H
 
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Format.h"
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 09a9991912daa..010e34179efc1 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -72,6 +72,37 @@ public:
   /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
   DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
 
+  typedef SmallVector<unsigned, 1> SwiftErrorVRegs;
+  typedef SmallVector<const Value*, 1> SwiftErrorValues;
+  /// A function can only have a single swifterror argument. And if it does
+  /// have a swifterror argument, it must be the first entry in
+  /// SwiftErrorVals.
+  SwiftErrorValues SwiftErrorVals;
+
+  /// Track the virtual register for each swifterror value in a given basic
+  /// block. Entries in SwiftErrorVRegs have the same ordering as entries
+  /// in SwiftErrorVals.
+  /// Note that another choice that is more straight-forward is to use
+  /// Map<const MachineBasicBlock*, Map<Value*, unsigned/*VReg*/>>. It
+  /// maintains a map from swifterror values to virtual registers for each
+  /// machine basic block. This choice does not require a one-to-one
+  /// correspondence between SwiftErrorValues and SwiftErrorVRegs. But because
+  /// of efficiency concern, we do not choose it.
+  llvm::DenseMap<const MachineBasicBlock*, SwiftErrorVRegs> SwiftErrorMap;
+
+  /// Track the virtual register for each swifterror value at the end of a basic
+  /// block when we need the assignment of a virtual register before the basic
+  /// block is visited. When we actually visit the basic block, we will make
+  /// sure the swifterror value is in the correct virtual register.
+  llvm::DenseMap<const MachineBasicBlock*, SwiftErrorVRegs>
+      SwiftErrorWorklist;
+
+  /// Find the swifterror virtual register in SwiftErrorMap. We will assert
+  /// failure when the value does not exist in swifterror map.
+  unsigned findSwiftErrorVReg(const MachineBasicBlock*, const Value*) const;
+  /// Set the swifterror virtual register in SwiftErrorMap.
+  void setSwiftErrorVReg(const MachineBasicBlock *MBB, const Value*, unsigned);
+
   /// ValueMap - Since we emit code for the function a basic block at a time,
   /// we must remember which virtual registers hold the values for
   /// cross-basic-block values.
@@ -80,15 +111,36 @@ public:
   /// Track virtual registers created for exception pointers.
   DenseMap<const Value *, unsigned> CatchPadExceptionPointers;
 
-  // Keep track of frame indices allocated for statepoints as they could be used
-  // across basic block boundaries.
-  // Key of the map is statepoint instruction, value is a map from spilled
-  // llvm Value to the optional stack stack slot index.
-  // If optional is unspecified it means that we have visited this value
-  // but didn't spill it.
-  typedef DenseMap<const Value*, Optional<int>> StatepointSpilledValueMapTy;
-  DenseMap<const Instruction*, StatepointSpilledValueMapTy>
-    StatepointRelocatedValues;
+  /// Keep track of frame indices allocated for statepoints as they could be
+  /// used across basic block boundaries.  This struct is more complex than a
+  /// simple map because the stateopint lowering code de-duplicates gc pointers
+  /// based on their SDValue (so %p and (bitcast %p to T) will get the same
+  /// slot), and we track that here.
+
+  struct StatepointSpillMap {
+    typedef DenseMap<const Value *, Optional<int>> SlotMapTy;
+
+    /// Maps uniqued llvm IR values to the slots they were spilled in.  If a
+    /// value is mapped to None it means we visited the value but didn't spill
+    /// it (because it was a constant, for instance).
+    SlotMapTy SlotMap;
+
+    /// Maps llvm IR values to the values they were de-duplicated to.
+    DenseMap<const Value *, const Value *> DuplicateMap;
+
+    SlotMapTy::const_iterator find(const Value *V) const {
+      auto DuplIt = DuplicateMap.find(V);
+      if (DuplIt != DuplicateMap.end())
+        V = DuplIt->second;
+      return SlotMap.find(V);
+    }
+
+    SlotMapTy::const_iterator end() const { return SlotMap.end(); }
+  };
+
+  /// Maps gc.statepoint instructions to their corresponding StatepointSpillMap
+  /// instances.
+  DenseMap<const Instruction *, StatepointSpillMap> StatepointSpillMaps;
 
   /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
   /// the entry block.  This allows the allocas to be efficiently referenced
@@ -119,7 +171,7 @@ public:
 
   struct LiveOutInfo {
     unsigned NumSignBits : 31;
-    bool IsValid : 1;
+    unsigned IsValid : 1;
     APInt KnownOne, KnownZero;
     LiveOutInfo() : NumSignBits(0), IsValid(true), KnownOne(1, 0),
                     KnownZero(1, 0) {}
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index 163117b0781cc..e6afcbc8ded28 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -40,6 +40,7 @@
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Pass.h"
 #include <memory>
+#include <utility>
 
 namespace llvm {
 class AsmPrinter;
@@ -54,7 +55,7 @@ struct GCPoint {
   DebugLoc Loc;
 
   GCPoint(GC::PointKind K, MCSymbol *L, DebugLoc DL)
-      : Kind(K), Label(L), Loc(DL) {}
+      : Kind(K), Label(L), Loc(std::move(DL)) {}
 };
 
 /// GCRoot - Metadata for a pointer to an object managed by the garbage
@@ -120,7 +121,7 @@ public:
   /// addSafePoint - Notes the existence of a safe point. Num is the ID of the
   /// label just prior to the safe point (if the code generator is using
   /// MachineModuleInfo).
-  void addSafePoint(GC::PointKind Kind, MCSymbol *Label, DebugLoc DL) {
+  void addSafePoint(GC::PointKind Kind, MCSymbol *Label, const DebugLoc &DL) {
     SafePoints.emplace_back(Kind, Label, DL);
   }
 
diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h
new file mode 100644
index 0000000000000..bbd0b6d885938
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -0,0 +1,72 @@
+//===-- llvm/CodeGen/GlobalISel/CallLowering.h - Call lowering --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file describes how to lower LLVM calls to machine code calls.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H
+#define LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+// Forward declarations.
+class MachineIRBuilder;
+class TargetLowering;
+class Value;
+
+class CallLowering {
+  const TargetLowering *TLI;
+ protected:
+  /// Getter for generic TargetLowering class.
+  const TargetLowering *getTLI() const {
+    return TLI;
+  }
+
+  /// Getter for target specific TargetLowering class.
+  template <class XXXTargetLowering>
+    const XXXTargetLowering *getTLI() const {
+    return static_cast<const XXXTargetLowering *>(TLI);
+  }
+ public:
+  CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
+  virtual ~CallLowering() {}
+
+  /// This hook must be implemented to lower outgoing return values, described
+  /// by \p Val, into the specified virtual register \p VReg.
+  /// This hook is used by GlobalISel.
+  ///
+  /// \return True if the lowering succeeds, false otherwise.
+  virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+                           unsigned VReg) const {
+    return false;
+  }
+
+  /// This hook must be implemented to lower the incoming (formal)
+  /// arguments, described by \p Args, for GlobalISel. Each argument
+  /// must end up in the related virtual register described by VRegs.
+  /// In other words, the first argument should end up in VRegs[0],
+  /// the second in VRegs[1], and so on.
+  /// \p MIRBuilder is set to the proper insertion for the argument
+  /// lowering.
+  ///
+  /// \return True if the lowering succeeded, false otherwise.
+  virtual bool
+  lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+                       const Function::ArgumentListType &Args,
+                       const SmallVectorImpl<unsigned> &VRegs) const {
+    return false;
+  }
+};
+} // End namespace llvm.
+
+#endif
diff --git a/include/llvm/CodeGen/GlobalISel/GISelAccessor.h b/include/llvm/CodeGen/GlobalISel/GISelAccessor.h
new file mode 100644
index 0000000000000..7c5ec9f3adc06
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/GISelAccessor.h
@@ -0,0 +1,33 @@
+//===-- GISelAccessor.h - GISel Accessor ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// This file declares the API to access the various APIs related
+/// to GlobalISel.
+//
+//===----------------------------------------------------------------------===/
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_GISELACCESSOR_H
+#define LLVM_CODEGEN_GLOBALISEL_GISELACCESSOR_H
+
+namespace llvm {
+class CallLowering;
+class RegisterBankInfo;
+
+/// The goal of this helper class is to gather the accessor to all
+/// the APIs related to GlobalISel.
+/// It should be derived to feature an actual accessor to the GISel APIs.
+/// The reason why this is not simply done into the subtarget is to avoid
+/// spreading ifdefs around.
+struct GISelAccessor {
+  virtual ~GISelAccessor() {}
+  virtual const CallLowering *getCallLowering() const { return nullptr;}
+  virtual const RegisterBankInfo *getRegBankInfo() const { return nullptr;}
+};
+} // End namespace llvm;
+#endif
diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
new file mode 100644
index 0000000000000..833e87493cad1
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -0,0 +1,158 @@
+//===-- llvm/CodeGen/GlobalISel/IRTranslator.h - IRTranslator ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the IRTranslator pass.
+/// This pass is responsible for translating LLVM IR into MachineInstr.
+/// It uses target hooks to lower the ABI but aside from that, the pass
+/// generated code is generic. This is the default translator used for
+/// GlobalISel.
+///
+/// \todo Replace the comments with actual doxygen comments.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_IRTRANSLATOR_H
+#define LLVM_CODEGEN_GLOBALISEL_IRTRANSLATOR_H
+
+#include "Types.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+// Forward declarations.
+class BasicBlock;
+class CallLowering;
+class Constant;
+class Instruction;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineRegisterInfo;
+
+// Technically the pass should run on an hypothetical MachineModule,
+// since it should translate Global into some sort of MachineGlobal.
+// The MachineGlobal should ultimately just be a transfer of ownership of
+// the interesting bits that are relevant to represent a global value.
+// That being said, we could investigate what would it cost to just duplicate
+// the information from the LLVM IR.
+// The idea is that ultimately we would be able to free up the memory used
+// by the LLVM IR as soon as the translation is over.
+class IRTranslator : public MachineFunctionPass {
+public:
+  static char ID;
+
+private:
+  /// Interface used to lower the everything related to calls.
+  const CallLowering *CLI;
+  /// Mapping of the values of the current LLVM IR function
+  /// to the related virtual registers.
+  ValueToVReg ValToVReg;
+  // Constants are special because when we encounter one,
+  // we do not know at first where to insert the definition since
+  // this depends on all its uses.
+  // Thus, we will insert the sequences to materialize them when
+  // we know all their users.
+  // In the meantime, just keep it in a set.
+  // Note: Constants that end up as immediate in the related instructions,
+  // do not appear in that map.
+  SmallSetVector<const Constant *, 8> Constants;
+
+  DenseMap<const BasicBlock *, MachineBasicBlock *> BBToMBB;
+
+  /// Methods for translating form LLVM IR to MachineInstr.
+  /// \see ::translate for general information on the translate methods.
+  /// @{
+
+  /// Translate \p Inst into its corresponding MachineInstr instruction(s).
+  /// Insert the newly translated instruction(s) right where the MIRBuilder
+  /// is set.
+  ///
+  /// The general algorithm is:
+  /// 1. Look for a virtual register for each operand or
+  ///    create one.
+  /// 2 Update the ValToVReg accordingly.
+  /// 2.alt. For constant arguments, if they are compile time constants,
+  ///   produce an immediate in the right operand and do not touch
+  ///   ValToReg. Actually we will go with a virtual register for each
+  ///   constants because it may be expensive to actually materialize the
+  ///   constant. Moreover, if the constant spans on several instructions,
+  ///   CSE may not catch them.
+  ///   => Update ValToVReg and remember that we saw a constant in Constants.
+  ///   We will materialize all the constants in finalize.
+  /// Note: we would need to do something so that we can recognize such operand
+  ///       as constants.
+  /// 3. Create the generic instruction.
+  ///
+  /// \return true if the translation succeeded.
+  bool translate(const Instruction &Inst);
+
+  /// Translate \p Inst into a binary operation \p Opcode.
+  /// \pre \p Inst is a binary operation.
+  bool translateBinaryOp(unsigned Opcode, const Instruction &Inst);
+
+  /// Translate branch (br) instruction.
+  /// \pre \p Inst is a branch instruction.
+  bool translateBr(const Instruction &Inst);
+
+  /// Translate return (ret) instruction.
+  /// The target needs to implement CallLowering::lowerReturn for
+  /// this to succeed.
+  /// \pre \p Inst is a return instruction.
+  bool translateReturn(const Instruction &Inst);
+  /// @}
+
+  // Builder for machine instruction a la IRBuilder.
+  // I.e., compared to regular MIBuilder, this one also inserts the instruction
+  // in the current block, it can creates block, etc., basically a kind of
+  // IRBuilder, but for Machine IR.
+  MachineIRBuilder MIRBuilder;
+
+  /// MachineRegisterInfo used to create virtual registers.
+  MachineRegisterInfo *MRI;
+
+  // * Insert all the code needed to materialize the constants
+  // at the proper place. E.g., Entry block or dominator block
+  // of each constant depending on how fancy we want to be.
+  // * Clear the different maps.
+  void finalize();
+
+  /// Get the VReg that represents \p Val.
+  /// If such VReg does not exist, it is created.
+  unsigned getOrCreateVReg(const Value &Val);
+
+  /// Get the MachineBasicBlock that represents \p BB.
+  /// If such basic block does not exist, it is created.
+  MachineBasicBlock &getOrCreateBB(const BasicBlock &BB);
+
+public:
+  // Ctor, nothing fancy.
+  IRTranslator();
+
+  const char *getPassName() const override {
+    return "IRTranslator";
+  }
+
+  // Algo:
+  //   CallLowering = MF.subtarget.getCallLowering()
+  //   F = MF.getParent()
+  //   MIRBuilder.reset(MF)
+  //   MIRBuilder.getOrCreateBB(F.getEntryBB())
+  //   CallLowering->translateArguments(MIRBuilder, F, ValToVReg)
+  //   for each bb in F
+  //     MIRBuilder.getOrCreateBB(bb)
+  //     for each inst in bb
+  //       if (!translate(MIRBuilder, inst, ValToVReg, ConstantToSequence))
+  //         report_fatal_error(“Don’t know how to translate input");
+  //   finalize()
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // End namespace llvm.
+#endif
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
new file mode 100644
index 0000000000000..efdc59a9cddf5
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -0,0 +1,146 @@
+//===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.h - MIBuilder --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the MachineIRBuilder class.
+/// This is a helper class to build MachineInstr.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H
+#define LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H
+
+#include "llvm/CodeGen/GlobalISel/Types.h"
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/DebugLoc.h"
+
+namespace llvm {
+
+// Forward declarations.
+class MachineFunction;
+class MachineInstr;
+class TargetInstrInfo;
+
+/// Helper class to build MachineInstr.
+/// It keeps internally the insertion point and debug location for all
+/// the new instructions we want to create.
+/// This information can be modify via the related setters.
+class MachineIRBuilder {
+  /// MachineFunction under construction.
+  MachineFunction *MF;
+  /// Information used to access the description of the opcodes.
+  const TargetInstrInfo *TII;
+  /// Debug location to be set to any instruction we create.
+  DebugLoc DL;
+
+  /// Fields describing the insertion point.
+  /// @{
+  MachineBasicBlock *MBB;
+  MachineInstr *MI;
+  bool Before;
+  /// @}
+
+  const TargetInstrInfo &getTII() {
+    assert(TII && "TargetInstrInfo is not set");
+    return *TII;
+  }
+
+public:
+  /// Getter for the function we currently build.
+  MachineFunction &getMF() {
+    assert(MF && "MachineFunction is not set");
+    return *MF;
+  }
+
+  /// Getter for the basic block we currently build.
+  MachineBasicBlock &getMBB() {
+    assert(MBB && "MachineBasicBlock is not set");
+    return *MBB;
+  }
+
+  /// Current insertion point for new instructions.
+  MachineBasicBlock::iterator getInsertPt();
+
+  /// Setters for the insertion point.
+  /// @{
+  /// Set the MachineFunction where to build instructions.
+  void setMF(MachineFunction &);
+
+  /// Set the insertion point to the beginning (\p Beginning = true) or end
+  /// (\p Beginning = false) of \p MBB.
+  /// \pre \p MBB must be contained by getMF().
+  void setMBB(MachineBasicBlock &MBB, bool Beginning = false);
+
+  /// Set the insertion point to before (\p Before = true) or after
+  /// (\p Before = false) \p MI.
+  /// \pre MI must be in getMF().
+  void setInstr(MachineInstr &MI, bool Before = false);
+  /// @}
+
+  /// Set the debug location to \p DL for all the next build instructions.
+  void setDebugLoc(const DebugLoc &DL) { this->DL = DL; }
+
+  /// Build and insert <empty> = \p Opcode [\p Ty] <empty>.
+  /// \p Ty is the type of the instruction if \p Opcode describes
+  /// a generic machine instruction. \p Ty must be nullptr if \p Opcode
+  /// does not describe a generic instruction.
+  /// The insertion point is the one set by the last call of either
+  /// setBasicBlock or setMI.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre Ty == nullptr or isPreISelGenericOpcode(Opcode)
+  ///
+  /// \return The newly created instruction.
+  MachineInstr *buildInstr(unsigned Opcode, Type *Ty);
+
+  /// Build and insert <empty> = \p Opcode [\p Ty] \p BB.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre Ty == nullptr or isPreISelGenericOpcode(Opcode)
+  ///
+  /// \return The newly created instruction.
+  MachineInstr *buildInstr(unsigned Opcode, Type *Ty, MachineBasicBlock &BB);
+
+  /// Build and insert \p Res<def> = \p Opcode [\p Ty] \p Op0, \p Op1.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre Ty == nullptr or isPreISelGenericOpcode(Opcode)
+  ///
+  /// \return The newly created instruction.
+  MachineInstr *buildInstr(unsigned Opcode, Type *Ty, unsigned Res,
+                           unsigned Op0, unsigned Op1);
+
+  /// Build and insert \p Res<def> = \p Opcode \p Op0, \p Op1.
+  /// I.e., instruction with a non-generic opcode.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre not isPreISelGenericOpcode(\p Opcode)
+  ///
+  /// \return The newly created instruction.
+  MachineInstr *buildInstr(unsigned Opcode, unsigned Res, unsigned Op0,
+                           unsigned Op1);
+
+  /// Build and insert \p Res<def> = \p Opcode \p Op0.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre not isPreISelGenericOpcode(\p Opcode)
+  ///
+  /// \return The newly created instruction.
+  MachineInstr *buildInstr(unsigned Opcode, unsigned Res, unsigned Op0);
+
+  /// Build and insert <empty> = \p Opcode <empty>.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre not isPreISelGenericOpcode(\p Opcode)
+  ///
+  /// \return The newly created instruction.
+  MachineInstr *buildInstr(unsigned Opcode);
+};
+
+} // End namespace llvm.
+#endif // LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H
diff --git a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
new file mode 100644
index 0000000000000..b393744e67cb6
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
@@ -0,0 +1,614 @@
+//== llvm/CodeGen/GlobalISel/RegBankSelect.h - Reg Bank Selector -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file describes the interface of the MachineFunctionPass
+/// responsible for assigning the generic virtual registers to register bank.
+
+/// By default, the reg bank selector relies on local decisions to
+/// assign the register bank. In other words, it looks at one instruction
+/// at a time to decide where the operand of that instruction should live.
+///
+/// At higher optimization level, we could imagine that the reg bank selector
+/// would use more global analysis and do crazier thing like duplicating
+/// instructions and so on. This is future work.
+///
+/// For now, the pass uses a greedy algorithm to decide where the operand
+/// of an instruction should live. It asks the target which banks may be
+/// used for each operand of the instruction and what is the cost. Then,
+/// it chooses the solution which minimize the cost of the instruction plus
+/// the cost of any move that may be needed to to the values into the right
+/// register bank.
+/// In other words, the cost for an instruction on a register bank RegBank
+/// is: Cost of I on RegBank plus the sum of the cost for bringing the
+/// input operands from their current register bank to RegBank.
+/// Thus, the following formula:
+/// cost(I, RegBank) = cost(I.Opcode, RegBank) +
+///    sum(for each arg in I.arguments: costCrossCopy(arg.RegBank, RegBank))
+///
+/// E.g., Let say we are assigning the register bank for the instruction
+/// defining v2.
+/// v0(A_REGBANK) = ...
+/// v1(A_REGBANK) = ...
+/// v2 = G_ADD i32 v0, v1 <-- MI
+///
+/// The target may say it can generate G_ADD i32 on register bank A and B
+/// with a cost of respectively 5 and 1.
+/// Then, let say the cost of a cross register bank copies from A to B is 1.
+/// The reg bank selector would compare the following two costs:
+/// cost(MI, A_REGBANK) = cost(G_ADD, A_REGBANK) + cost(v0.RegBank, A_REGBANK) +
+///    cost(v1.RegBank, A_REGBANK)
+///                     = 5 + cost(A_REGBANK, A_REGBANK) + cost(A_REGBANK,
+///                                                             A_REGBANK)
+///                     = 5 + 0 + 0 = 5
+/// cost(MI, B_REGBANK) = cost(G_ADD, B_REGBANK) + cost(v0.RegBank, B_REGBANK) +
+///    cost(v1.RegBank, B_REGBANK)
+///                     = 1 + cost(A_REGBANK, B_REGBANK) + cost(A_REGBANK,
+///                                                             B_REGBANK)
+///                     = 1 + 1 + 1 = 3
+/// Therefore, in this specific example, the reg bank selector would choose
+/// bank B for MI.
+/// v0(A_REGBANK) = ...
+/// v1(A_REGBANK) = ...
+/// tmp0(B_REGBANK) = COPY v0
+/// tmp1(B_REGBANK) = COPY v1
+/// v2(B_REGBANK) = G_ADD i32 tmp0, tmp1
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_REGBANKSELECT_H
+#define LLVM_CODEGEN_GLOBALISEL_REGBANKSELECT_H
+
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+// Forward declarations.
+class BlockFrequency;
+class MachineBranchProbabilityInfo;
+class MachineBlockFrequencyInfo;
+class MachineRegisterInfo;
+class TargetRegisterInfo;
+
+/// This pass implements the reg bank selector pass used in the GlobalISel
+/// pipeline. At the end of this pass, all register operands have been assigned
+class RegBankSelect : public MachineFunctionPass {
+public:
+  static char ID;
+
+  /// List of the modes supported by the RegBankSelect pass.
+  enum Mode {
+    /// Assign the register banks as fast as possible (default).
+    Fast,
+    /// Greedily minimize the cost of assigning register banks.
+    /// This should produce code of greater quality, but will
+    /// require more compile time.
+    Greedy
+  };
+
+  /// Abstract class used to represent an insertion point in a CFG.
+  /// This class records an insertion point and materializes it on
+  /// demand.
+  /// It allows to reason about the frequency of this insertion point,
+  /// without having to logically materialize it (e.g., on an edge),
+  /// before we actually need to insert something.
+  class InsertPoint {
+  protected:
+    /// Tell if the insert point has already been materialized.
+    bool WasMaterialized = false;
+    /// Materialize the insertion point.
+    ///
+    /// If isSplit() is true, this involves actually splitting
+    /// the block or edge.
+    ///
+    /// \post getPointImpl() returns a valid iterator.
+    /// \post getInsertMBBImpl() returns a valid basic block.
+    /// \post isSplit() == false ; no more splitting should be required.
+    virtual void materialize() = 0;
+
+    /// Return the materialized insertion basic block.
+    /// Code will be inserted into that basic block.
+    ///
+    /// \pre ::materialize has been called.
+    virtual MachineBasicBlock &getInsertMBBImpl() = 0;
+
+    /// Return the materialized insertion point.
+    /// Code will be inserted before that point.
+    ///
+    /// \pre ::materialize has been called.
+    virtual MachineBasicBlock::iterator getPointImpl() = 0;
+
+  public:
+    virtual ~InsertPoint() {}
+
+    /// The first call to this method will cause the splitting to
+    /// happen if need be, then sub sequent calls just return
+    /// the iterator to that point. I.e., no more splitting will
+    /// occur.
+    ///
+    /// \return The iterator that should be used with
+    /// MachineBasicBlock::insert. I.e., additional code happens
+    /// before that point.
+    MachineBasicBlock::iterator getPoint() {
+      if (!WasMaterialized) {
+        WasMaterialized = true;
+        assert(canMaterialize() && "Impossible to materialize this point");
+        materialize();
+      }
+      // When we materialized the point we should have done the splitting.
+      assert(!isSplit() && "Wrong pre-condition");
+      return getPointImpl();
+    }
+
+    /// The first call to this method will cause the splitting to
+    /// happen if need be, then sub sequent calls just return
+    /// the basic block that contains the insertion point.
+    /// I.e., no more splitting will occur.
+    ///
+    /// \return The basic block should be used with
+    /// MachineBasicBlock::insert and ::getPoint. The new code should
+    /// happen before that point.
+    MachineBasicBlock &getInsertMBB() {
+      if (!WasMaterialized) {
+        WasMaterialized = true;
+        assert(canMaterialize() && "Impossible to materialize this point");
+        materialize();
+      }
+      // When we materialized the point we should have done the splitting.
+      assert(!isSplit() && "Wrong pre-condition");
+      return getInsertMBBImpl();
+    }
+
+    /// Insert \p MI in the just before ::getPoint()
+    MachineBasicBlock::iterator insert(MachineInstr &MI) {
+      return getInsertMBB().insert(getPoint(), &MI);
+    }
+
+    /// Does this point involve splitting an edge or block?
+    /// As soon as ::getPoint is called and thus, the point
+    /// materialized, the point will not require splitting anymore,
+    /// i.e., this will return false.
+    virtual bool isSplit() const { return false; }
+
+    /// Frequency of the insertion point.
+    /// \p P is used to access the various analysis that will help to
+    /// get that information, like MachineBlockFrequencyInfo.  If \p P
+    /// does not contain enough enough to return the actual frequency,
+    /// this returns 1.
+    virtual uint64_t frequency(const Pass &P) const { return 1; }
+
+    /// Check whether this insertion point can be materialized.
+    /// As soon as ::getPoint is called and thus, the point materialized
+    /// calling this method does not make sense.
+    virtual bool canMaterialize() const { return false; }
+  };
+
+  /// Insertion point before or after an instruction.
+  class InstrInsertPoint : public InsertPoint {
+  private:
+    /// Insertion point.
+    MachineInstr &Instr;
+    /// Does the insertion point is before or after Instr.
+    bool Before;
+
+    void materialize() override;
+
+    MachineBasicBlock::iterator getPointImpl() override {
+      if (Before)
+        return Instr;
+      return Instr.getNextNode() ? *Instr.getNextNode()
+                                 : Instr.getParent()->end();
+    }
+
+    MachineBasicBlock &getInsertMBBImpl() override {
+      return *Instr.getParent();
+    }
+
+  public:
+    /// Create an insertion point before (\p Before=true) or after \p Instr.
+    InstrInsertPoint(MachineInstr &Instr, bool Before = true);
+    bool isSplit() const override;
+    uint64_t frequency(const Pass &P) const override;
+
+    // Worst case, we need to slice the basic block, but that is still doable.
+    bool canMaterialize() const override { return true; }
+  };
+
+  /// Insertion point at the beginning or end of a basic block.
+  class MBBInsertPoint : public InsertPoint {
+  private:
+    /// Insertion point.
+    MachineBasicBlock &MBB;
+    /// Does the insertion point is at the beginning or end of MBB.
+    bool Beginning;
+
+    void materialize() override { /*Nothing to do to materialize*/
+    }
+
+    MachineBasicBlock::iterator getPointImpl() override {
+      return Beginning ? MBB.begin() : MBB.end();
+    }
+
+    MachineBasicBlock &getInsertMBBImpl() override { return MBB; }
+
+  public:
+    MBBInsertPoint(MachineBasicBlock &MBB, bool Beginning = true)
+        : InsertPoint(), MBB(MBB), Beginning(Beginning) {
+      // If we try to insert before phis, we should use the insertion
+      // points on the incoming edges.
+      assert((!Beginning || MBB.getFirstNonPHI() == MBB.begin()) &&
+             "Invalid beginning point");
+      // If we try to insert after the terminators, we should use the
+      // points on the outcoming edges.
+      assert((Beginning || MBB.getFirstTerminator() == MBB.end()) &&
+             "Invalid end point");
+    }
+    bool isSplit() const override { return false; }
+    uint64_t frequency(const Pass &P) const override;
+    bool canMaterialize() const override { return true; };
+  };
+
+  /// Insertion point on an edge.
+  class EdgeInsertPoint : public InsertPoint {
+  private:
+    /// Source of the edge.
+    MachineBasicBlock &Src;
+    /// Destination of the edge.
+    /// After the materialization is done, this hold the basic block
+    /// that resulted from the splitting.
+    MachineBasicBlock *DstOrSplit;
+    /// P is used to update the analysis passes as applicable.
+    Pass &P;
+
+    void materialize() override;
+
+    MachineBasicBlock::iterator getPointImpl() override {
+      // DstOrSplit should be the Split block at this point.
+      // I.e., it should have one predecessor, Src, and one successor,
+      // the original Dst.
+      assert(DstOrSplit && DstOrSplit->isPredecessor(&Src) &&
+             DstOrSplit->pred_size() == 1 && DstOrSplit->succ_size() == 1 &&
+             "Did not split?!");
+      return DstOrSplit->begin();
+    }
+
+    MachineBasicBlock &getInsertMBBImpl() override { return *DstOrSplit; }
+
+  public:
+    EdgeInsertPoint(MachineBasicBlock &Src, MachineBasicBlock &Dst, Pass &P)
+        : InsertPoint(), Src(Src), DstOrSplit(&Dst), P(P) {}
+    bool isSplit() const override {
+      return Src.succ_size() > 1 && DstOrSplit->pred_size() > 1;
+    }
+    uint64_t frequency(const Pass &P) const override;
+    bool canMaterialize() const override;
+  };
+
+  /// Struct used to represent the placement of a repairing point for
+  /// a given operand.
+  class RepairingPlacement {
+  public:
+    /// Define the kind of action this repairing needs.
+    enum RepairingKind {
+      /// Nothing to repair, just drop this action.
+      None,
+      /// Reparing code needs to happen before InsertPoints.
+      Insert,
+      /// (Re)assign the register bank of the operand.
+      Reassign,
+      /// Mark this repairing placement as impossible.
+      Impossible
+    };
+
+    /// Convenient types for a list of insertion points.
+    /// @{
+    typedef SmallVector<std::unique_ptr<InsertPoint>, 2> InsertionPoints;
+    typedef InsertionPoints::iterator insertpt_iterator;
+    typedef InsertionPoints::const_iterator const_insertpt_iterator;
+    /// @}
+
+  private:
+    /// Kind of repairing.
+    RepairingKind Kind;
+    /// Index of the operand that will be repaired.
+    unsigned OpIdx;
+    /// Are all the insert points materializeable?
+    bool CanMaterialize;
+    /// Is there any of the insert points needing splitting?
+    bool HasSplit;
+    /// Insertion point for the repair code.
+    /// The repairing code needs to happen just before these points.
+    InsertionPoints InsertPoints;
+    /// Some insertion points may need to update the liveness and such.
+    Pass &P;
+
+  public:
+    /// Create a repairing placement for the \p OpIdx-th operand of
+    /// \p MI. \p TRI is used to make some checks on the register aliases
+    /// if the machine operand is a physical register. \p P is used to
+    /// to update liveness information and such when materializing the
+    /// points.
+    RepairingPlacement(MachineInstr &MI, unsigned OpIdx,
+                       const TargetRegisterInfo &TRI, Pass &P,
+                       RepairingKind Kind = RepairingKind::Insert);
+
+    /// Getters.
+    /// @{
+    RepairingKind getKind() const { return Kind; }
+    unsigned getOpIdx() const { return OpIdx; }
+    bool canMaterialize() const { return CanMaterialize; }
+    bool hasSplit() { return HasSplit; }
+    /// @}
+
+    /// Overloaded methods to add an insertion point.
+    /// @{
+    /// Add a MBBInsertionPoint to the list of InsertPoints.
+    void addInsertPoint(MachineBasicBlock &MBB, bool Beginning);
+    /// Add a InstrInsertionPoint to the list of InsertPoints.
+    void addInsertPoint(MachineInstr &MI, bool Before);
+    /// Add an EdgeInsertionPoint (\p Src, \p Dst) to the list of InsertPoints.
+    void addInsertPoint(MachineBasicBlock &Src, MachineBasicBlock &Dst);
+    /// Add an InsertPoint to the list of insert points.
+    /// This method takes the ownership of &\p Point.
+    void addInsertPoint(InsertPoint &Point);
+    /// @}
+
+    /// Accessors related to the insertion points.
+    /// @{
+    insertpt_iterator begin() { return InsertPoints.begin(); }
+    insertpt_iterator end() { return InsertPoints.end(); }
+
+    const_insertpt_iterator begin() const { return InsertPoints.begin(); }
+    const_insertpt_iterator end() const { return InsertPoints.end(); }
+
+    unsigned getNumInsertPoints() const { return InsertPoints.size(); }
+    /// @}
+
+    /// Change the type of this repairing placement to \p NewKind.
+    /// It is not possible to switch a repairing placement to the
+    /// RepairingKind::Insert. There is no fundamental problem with
+    /// that, but no uses as well, so do not support it for now.
+    ///
+    /// \pre NewKind != RepairingKind::Insert
+    /// \post getKind() == NewKind
+    void switchTo(RepairingKind NewKind) {
+      assert(NewKind != Kind && "Already of the right Kind");
+      Kind = NewKind;
+      InsertPoints.clear();
+      CanMaterialize = NewKind != RepairingKind::Impossible;
+      HasSplit = false;
+      assert(NewKind != RepairingKind::Insert &&
+             "We would need more MI to switch to Insert");
+    }
+  };
+
+private:
+  /// Helper class used to represent the cost for mapping an instruction.
+  /// When mapping an instruction, we may introduce some repairing code.
+  /// In most cases, the repairing code is local to the instruction,
+  /// thus, we can omit the basic block frequency from the cost.
+  /// However, some alternatives may produce non-local cost, e.g., when
+  /// repairing a phi, and thus we then need to scale the local cost
+  /// to the non-local cost. This class does this for us.
+  /// \note: We could simply always scale the cost. The problem is that
+  /// there are higher chances that we saturate the cost easier and end
+  /// up having the same cost for actually different alternatives.
+  /// Another option would be to use APInt everywhere.
+  class MappingCost {
+  private:
+    /// Cost of the local instructions.
+    /// This cost is free of basic block frequency.
+    uint64_t LocalCost;
+    /// Cost of the non-local instructions.
+    /// This cost should include the frequency of the related blocks.
+    uint64_t NonLocalCost;
+    /// Frequency of the block where the local instructions live.
+    uint64_t LocalFreq;
+
+    MappingCost(uint64_t LocalCost, uint64_t NonLocalCost, uint64_t LocalFreq)
+        : LocalCost(LocalCost), NonLocalCost(NonLocalCost),
+          LocalFreq(LocalFreq) {}
+
+    /// Check if this cost is saturated.
+    bool isSaturated() const;
+
+  public:
+    /// Create a MappingCost assuming that most of the instructions
+    /// will occur in a basic block with \p LocalFreq frequency.
+    MappingCost(const BlockFrequency &LocalFreq);
+
+    /// Add \p Cost to the local cost.
+    /// \return true if this cost is saturated, false otherwise.
+    bool addLocalCost(uint64_t Cost);
+
+    /// Add \p Cost to the non-local cost.
+    /// Non-local cost should reflect the frequency of their placement.
+    /// \return true if this cost is saturated, false otherwise.
+    bool addNonLocalCost(uint64_t Cost);
+
+    /// Saturate the cost to the maximal representable value.
+    void saturate();
+
+    /// Return an instance of MappingCost that represents an
+    /// impossible mapping.
+    static MappingCost ImpossibleCost();
+
+    /// Check if this is less than \p Cost.
+    bool operator<(const MappingCost &Cost) const;
+    /// Check if this is equal to \p Cost.
+    bool operator==(const MappingCost &Cost) const;
+    /// Check if this is not equal to \p Cost.
+    bool operator!=(const MappingCost &Cost) const { return !(*this == Cost); }
+    /// Check if this is greater than \p Cost.
+    bool operator>(const MappingCost &Cost) const {
+      return *this != Cost && Cost < *this;
+    }
+  };
+
+  /// Interface to the target lowering info related
+  /// to register banks.
+  const RegisterBankInfo *RBI;
+
+  /// MRI contains all the register class/bank information that this
+  /// pass uses and updates.
+  MachineRegisterInfo *MRI;
+
+  /// Information on the register classes for the current function.
+  const TargetRegisterInfo *TRI;
+
+  /// Get the frequency of blocks.
+  /// This is required for non-fast mode.
+  MachineBlockFrequencyInfo *MBFI;
+
+  /// Get the frequency of the edges.
+  /// This is required for non-fast mode.
+  MachineBranchProbabilityInfo *MBPI;
+
+  /// Helper class used for every code morphing.
+  MachineIRBuilder MIRBuilder;
+
+  /// Optimization mode of the pass.
+  Mode OptMode;
+
+  /// Assign the register bank of each operand of \p MI.
+  void assignInstr(MachineInstr &MI);
+
+  /// Initialize the field members using \p MF.
+  void init(MachineFunction &MF);
+
+  /// Check if \p Reg is already assigned what is described by \p ValMapping.
+  /// \p OnlyAssign == true means that \p Reg just needs to be assigned a
+  /// register bank.  I.e., no repairing is necessary to have the
+  /// assignment match.
+  bool assignmentMatch(unsigned Reg,
+                       const RegisterBankInfo::ValueMapping &ValMapping,
+                       bool &OnlyAssign) const;
+
+  /// Insert repairing code for \p Reg as specified by \p ValMapping.
+  /// The repairing placement is specified by \p RepairPt.
+  /// \p NewVRegs contains all the registers required to remap \p Reg.
+  /// In other words, the number of registers in NewVRegs must be equal
+  /// to ValMapping.BreakDown.size().
+  ///
+  /// The transformation could be sketched as:
+  /// \code
+  /// ... = op Reg
+  /// \endcode
+  /// Becomes
+  /// \code
+  /// <NewRegs> = COPY or extract Reg
+  /// ... = op Reg
+  /// \endcode
+  ///
+  /// and
+  /// \code
+  /// Reg = op ...
+  /// \endcode
+  /// Becomes
+  /// \code
+  /// Reg = op ...
+  /// Reg = COPY or build_sequence <NewRegs>
+  /// \endcode
+  ///
+  /// \pre NewVRegs.size() == ValMapping.BreakDown.size()
+  ///
+  /// \note The caller is supposed to do the rewriting of op if need be.
+  /// I.e., Reg = op ... => <NewRegs> = NewOp ...
+  void repairReg(MachineOperand &MO,
+                 const RegisterBankInfo::ValueMapping &ValMapping,
+                 RegBankSelect::RepairingPlacement &RepairPt,
+                 const iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+                     &NewVRegs);
+
+  /// Return the cost of the instruction needed to map \p MO to \p ValMapping.
+  /// The cost is free of basic block frequencies.
+  /// \pre MO.isReg()
+  /// \pre MO is assigned to a register bank.
+  /// \pre ValMapping is a valid mapping for MO.
+  uint64_t
+  getRepairCost(const MachineOperand &MO,
+                const RegisterBankInfo::ValueMapping &ValMapping) const;
+
+  /// Find the best mapping for \p MI from \p PossibleMappings.
+  /// \return a reference on the best mapping in \p PossibleMappings.
+  RegisterBankInfo::InstructionMapping &
+  findBestMapping(MachineInstr &MI,
+                  RegisterBankInfo::InstructionMappings &PossibleMappings,
+                  SmallVectorImpl<RepairingPlacement> &RepairPts);
+
+  /// Compute the cost of mapping \p MI with \p InstrMapping and
+  /// compute the repairing placement for such mapping in \p
+  /// RepairPts.
+  /// \p BestCost is used to specify when the cost becomes too high
+  /// and thus it is not worth computing the RepairPts.  Moreover if
+  /// \p BestCost == nullptr, the mapping cost is actually not
+  /// computed.
+  MappingCost
+  computeMapping(MachineInstr &MI,
+                 const RegisterBankInfo::InstructionMapping &InstrMapping,
+                 SmallVectorImpl<RepairingPlacement> &RepairPts,
+                 const MappingCost *BestCost = nullptr);
+
+  /// When \p RepairPt involves splitting to repair \p MO for the
+  /// given \p ValMapping, try to change the way we repair such that
+  /// the splitting is not required anymore.
+  ///
+  /// \pre \p RepairPt.hasSplit()
+  /// \pre \p MO == MO.getParent()->getOperand(\p RepairPt.getOpIdx())
+  /// \pre \p ValMapping is the mapping of \p MO for MO.getParent()
+  ///      that implied \p RepairPt.
+  void tryAvoidingSplit(RegBankSelect::RepairingPlacement &RepairPt,
+                        const MachineOperand &MO,
+                        const RegisterBankInfo::ValueMapping &ValMapping) const;
+
+  /// Apply \p Mapping to \p MI. \p RepairPts represents the different
+  /// mapping action that need to happen for the mapping to be
+  /// applied.
+  void applyMapping(MachineInstr &MI,
+                    const RegisterBankInfo::InstructionMapping &InstrMapping,
+                    SmallVectorImpl<RepairingPlacement> &RepairPts);
+
+public:
+  /// Create a RegBankSelect pass with the specified \p RunningMode.
+  RegBankSelect(Mode RunningMode = Fast);
+
+  const char *getPassName() const override {
+    return "RegBankSelect";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// Walk through \p MF and assign a register bank to every virtual register
+  /// that are still mapped to nothing.
+  /// The target needs to provide a RegisterBankInfo and in particular
+  /// override RegisterBankInfo::getInstrMapping.
+  ///
+  /// Simplified algo:
+  /// \code
+  ///   RBI = MF.subtarget.getRegBankInfo()
+  ///   MIRBuilder.setMF(MF)
+  ///   for each bb in MF
+  ///     for each inst in bb
+  ///       MIRBuilder.setInstr(inst)
+  ///       MappingCosts = RBI.getMapping(inst);
+  ///       Idx = findIdxOfMinCost(MappingCosts)
+  ///       CurRegBank = MappingCosts[Idx].RegBank
+  ///       MRI.setRegBank(inst.getOperand(0).getReg(), CurRegBank)
+  ///       for each argument in inst
+  ///         if (CurRegBank != argument.RegBank)
+  ///           ArgReg = argument.getReg()
+  ///           Tmp = MRI.createNewVirtual(MRI.getSize(ArgReg), CurRegBank)
+  ///           MIRBuilder.buildInstr(COPY, Tmp, ArgReg)
+  ///           inst.getOperand(argument.getOperandNo()).setReg(Tmp)
+  /// \endcode
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End namespace llvm.
+
+#endif
diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBank.h b/include/llvm/CodeGen/GlobalISel/RegisterBank.h
new file mode 100644
index 0000000000000..e886382fd8e83
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/RegisterBank.h
@@ -0,0 +1,101 @@
+//==-- llvm/CodeGen/GlobalISel/RegisterBank.h - Register Bank ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file declares the API of register banks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_REGBANK_H
+#define LLVM_CODEGEN_GLOBALISEL_REGBANK_H
+
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+// Forward declarations.
+class RegisterBankInfo;
+class raw_ostream;
+class TargetRegisterClass;
+class TargetRegisterInfo;
+
+/// This class implements the register bank concept.
+/// Two instances of RegisterBank must have different ID.
+/// This property is enforced by the RegisterBankInfo class.
+class RegisterBank {
+private:
+  unsigned ID;
+  const char *Name;
+  unsigned Size;
+  BitVector ContainedRegClasses;
+
+  /// Sentinel value used to recognize register bank not properly
+  /// initialized yet.
+  static const unsigned InvalidID;
+
+  /// Only the RegisterBankInfo can create RegisterBank.
+  /// The default constructor will leave the object in
+  /// an invalid state. I.e. isValid() == false.
+  /// The field must be updated to fix that.
+  RegisterBank();
+
+  friend RegisterBankInfo;
+
+public:
+  /// Get the identifier of this register bank.
+  unsigned getID() const { return ID; }
+
+  /// Get a user friendly name of this register bank.
+  /// Should be used only for debugging purposes.
+  const char *getName() const { return Name; }
+
+  /// Get the maximal size in bits that fits in this register bank.
+  unsigned getSize() const { return Size; }
+
+  /// Check whether this instance is ready to be used.
+  bool isValid() const;
+
+  /// Check if this register bank is valid. In other words,
+  /// if it has been properly constructed.
+  ///
+  /// \note This method does not check anything when assertions are disabled.
+  ///
+  /// \return True is the check was successful.
+  bool verify(const TargetRegisterInfo &TRI) const;
+
+  /// Check whether this register bank covers \p RC.
+  /// In other words, check if this register bank fully covers
+  /// the registers that \p RC contains.
+  /// \pre isValid()
+  bool covers(const TargetRegisterClass &RC) const;
+
+  /// Check whether \p OtherRB is the same as this.
+  bool operator==(const RegisterBank &OtherRB) const;
+  bool operator!=(const RegisterBank &OtherRB) const {
+    return !this->operator==(OtherRB);
+  }
+
+  /// Dump the register mask on dbgs() stream.
+  /// The dump is verbose.
+  void dump(const TargetRegisterInfo *TRI = nullptr) const;
+
+  /// Print the register mask on OS.
+  /// If IsForDebug is false, then only the name of the register bank
+  /// is printed. Otherwise, all the fields are printing.
+  /// TRI is then used to print the name of the register classes that
+  /// this register bank covers.
+  void print(raw_ostream &OS, bool IsForDebug = false,
+             const TargetRegisterInfo *TRI = nullptr) const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const RegisterBank &RegBank) {
+  RegBank.print(OS);
+  return OS;
+}
+} // End namespace llvm.
+
+#endif
diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
new file mode 100644
index 0000000000000..19d170365858b
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -0,0 +1,602 @@
+//==-- llvm/CodeGen/GlobalISel/RegisterBankInfo.h ----------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file declares the API for the register bank info.
+/// This API is responsible for handling the register banks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_REGBANKINFO_H
+#define LLVM_CODEGEN_GLOBALISEL_REGBANKINFO_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MachineValueType.h" // For SimpleValueType.
+#include "llvm/Support/ErrorHandling.h"
+
+#include <cassert>
+#include <memory> // For unique_ptr.
+
+namespace llvm {
+class MachineInstr;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class raw_ostream;
+
+/// Holds all the information related to register banks.
+class RegisterBankInfo {
+public:
+  /// Helper struct that represents how a value is partially mapped
+  /// into a register.
+  /// The StartIdx and Length represent what region of the orginal
+  /// value this partial mapping covers.
+  /// This can be represented as a Mask of contiguous bit starting
+  /// at StartIdx bit and spanning Length bits.
+  /// StartIdx is the number of bits from the less significant bits.
+  struct PartialMapping {
+    /// Number of bits at which this partial mapping starts in the
+    /// original value.  The bits are counted from less significant
+    /// bits to most significant bits.
+    unsigned StartIdx;
+    /// Length of this mapping in bits. This is how many bits this
+    /// partial mapping covers in the original value:
+    /// from StartIdx to StartIdx + Length -1.
+    unsigned Length;
+    /// Register bank where the partial value lives.
+    const RegisterBank *RegBank;
+
+    PartialMapping() = default;
+
+    /// Provide a shortcut for quickly building PartialMapping.
+    PartialMapping(unsigned StartIdx, unsigned Length,
+                   const RegisterBank &RegBank)
+        : StartIdx(StartIdx), Length(Length), RegBank(&RegBank) {}
+
+    /// \return the index of in the original value of the most
+    /// significant bit that this partial mapping covers.
+    unsigned getHighBitIdx() const { return StartIdx + Length - 1; }
+
+    /// Print this partial mapping on dbgs() stream.
+    void dump() const;
+
+    /// Print this partial mapping on \p OS;
+    void print(raw_ostream &OS) const;
+
+    /// Check that the Mask is compatible with the RegBank.
+    /// Indeed, if the RegBank cannot accomadate the "active bits" of the mask,
+    /// there is no way this mapping is valid.
+    ///
+    /// \note This method does not check anything when assertions are disabled.
+    ///
+    /// \return True is the check was successful.
+    bool verify() const;
+  };
+
+  /// Helper struct that represents how a value is mapped through
+  /// different register banks.
+  struct ValueMapping {
+    /// How the value is broken down between the different register banks.
+    SmallVector<PartialMapping, 2> BreakDown;
+
+    /// Verify that this mapping makes sense for a value of \p ExpectedBitWidth.
+    /// \note This method does not check anything when assertions are disabled.
+    ///
+    /// \return True is the check was successful.
+    bool verify(unsigned ExpectedBitWidth) const;
+
+    /// Print this on dbgs() stream.
+    void dump() const;
+
+    /// Print this on \p OS;
+    void print(raw_ostream &OS) const;
+  };
+
+  /// Helper class that represents how the value of an instruction may be
+  /// mapped and what is the related cost of such mapping.
+  class InstructionMapping {
+    /// Identifier of the mapping.
+    /// This is used to communicate between the target and the optimizers
+    /// which mapping should be realized.
+    unsigned ID;
+    /// Cost of this mapping.
+    unsigned Cost;
+    /// Mapping of all the operands.
+    std::unique_ptr<ValueMapping[]> OperandsMapping;
+    /// Number of operands.
+    unsigned NumOperands;
+
+    ValueMapping &getOperandMapping(unsigned i) {
+      assert(i < getNumOperands() && "Out of bound operand");
+      return OperandsMapping[i];
+    }
+
+  public:
+    /// Constructor for the mapping of an instruction.
+    /// \p NumOperands must be equal to number of all the operands of
+    /// the related instruction.
+    /// The rationale is that it is more efficient for the optimizers
+    /// to be able to assume that the mapping of the ith operand is
+    /// at the index i.
+    ///
+    /// \pre ID != InvalidMappingID
+    InstructionMapping(unsigned ID, unsigned Cost, unsigned NumOperands)
+        : ID(ID), Cost(Cost), NumOperands(NumOperands) {
+      assert(getID() != InvalidMappingID &&
+             "Use the default constructor for invalid mapping");
+      OperandsMapping.reset(new ValueMapping[getNumOperands()]);
+    }
+
+    /// Default constructor.
+    /// Use this constructor to express that the mapping is invalid.
+    InstructionMapping() : ID(InvalidMappingID), Cost(0), NumOperands(0) {}
+
+    /// Get the cost.
+    unsigned getCost() const { return Cost; }
+
+    /// Get the ID.
+    unsigned getID() const { return ID; }
+
+    /// Get the number of operands.
+    unsigned getNumOperands() const { return NumOperands; }
+
+    /// Get the value mapping of the ith operand.
+    const ValueMapping &getOperandMapping(unsigned i) const {
+      return const_cast<InstructionMapping *>(this)->getOperandMapping(i);
+    }
+
+    /// Get the value mapping of the ith operand.
+    void setOperandMapping(unsigned i, const ValueMapping &ValMapping) {
+      getOperandMapping(i) = ValMapping;
+    }
+
+    /// Check whether this object is valid.
+    /// This is a lightweight check for obvious wrong instance.
+    bool isValid() const { return getID() != InvalidMappingID; }
+
+    /// Set the operand mapping for the \p OpIdx-th operand.
+    /// The mapping will consist of only one element in the break down list.
+    /// This element will map to \p RegBank and fully define a mask, whose
+    /// bitwidth matches the size of \p MaskSize.
+    void setOperandMapping(unsigned OpIdx, unsigned MaskSize,
+                           const RegisterBank &RegBank);
+
+    /// Verifiy that this mapping makes sense for \p MI.
+    /// \pre \p MI must be connected to a MachineFunction.
+    ///
+    /// \note This method does not check anything when assertions are disabled.
+    ///
+    /// \return True is the check was successful.
+    bool verify(const MachineInstr &MI) const;
+
+    /// Print this on dbgs() stream.
+    void dump() const;
+
+    /// Print this on \p OS;
+    void print(raw_ostream &OS) const;
+  };
+
+  /// Convenient type to represent the alternatives for mapping an
+  /// instruction.
+  /// \todo When we move to TableGen this should be an array ref.
+  typedef SmallVector<InstructionMapping, 4> InstructionMappings;
+
+  /// Helper class use to get/create the virtual registers that will be used
+  /// to replace the MachineOperand when applying a mapping.
+  class OperandsMapper {
+    /// The OpIdx-th cell contains the index in NewVRegs where the VRegs of the
+    /// OpIdx-th operand starts. -1 means we do not have such mapping yet.
+    std::unique_ptr<int[]> OpToNewVRegIdx;
+    /// Hold the registers that will be used to map MI with InstrMapping.
+    SmallVector<unsigned, 8> NewVRegs;
+    /// Current MachineRegisterInfo, used to create new virtual registers.
+    MachineRegisterInfo &MRI;
+    /// Instruction being remapped.
+    MachineInstr &MI;
+    /// New mapping of the instruction.
+    const InstructionMapping &InstrMapping;
+
+    /// Constant value identifying that the index in OpToNewVRegIdx
+    /// for an operand has not been set yet.
+    static const int DontKnowIdx;
+
+    /// Get the range in NewVRegs to store all the partial
+    /// values for the \p OpIdx-th operand.
+    ///
+    /// \return The iterator range for the space created.
+    //
+    /// \pre getMI().getOperand(OpIdx).isReg()
+    iterator_range<SmallVectorImpl<unsigned>::iterator>
+    getVRegsMem(unsigned OpIdx);
+
+    /// Get the end iterator for a range starting at \p StartIdx and
+    /// spannig \p NumVal in NewVRegs.
+    /// \pre StartIdx + NumVal <= NewVRegs.size()
+    SmallVectorImpl<unsigned>::const_iterator
+    getNewVRegsEnd(unsigned StartIdx, unsigned NumVal) const;
+    SmallVectorImpl<unsigned>::iterator getNewVRegsEnd(unsigned StartIdx,
+                                                       unsigned NumVal);
+
+  public:
+    /// Create an OperandsMapper that will hold the information to apply \p
+    /// InstrMapping to \p MI.
+    /// \pre InstrMapping.verify(MI)
+    OperandsMapper(MachineInstr &MI, const InstructionMapping &InstrMapping,
+                   MachineRegisterInfo &MRI);
+
+    /// Getters.
+    /// @{
+    /// The MachineInstr being remapped.
+    MachineInstr &getMI() const { return MI; }
+
+    /// The final mapping of the instruction.
+    const InstructionMapping &getInstrMapping() const { return InstrMapping; }
+    /// @}
+
+    /// Create as many new virtual registers as needed for the mapping of the \p
+    /// OpIdx-th operand.
+    /// The number of registers is determined by the number of breakdown for the
+    /// related operand in the instruction mapping.
+    ///
+    /// \pre getMI().getOperand(OpIdx).isReg()
+    ///
+    /// \post All the partial mapping of the \p OpIdx-th operand have been
+    /// assigned a new virtual register.
+    void createVRegs(unsigned OpIdx);
+
+    /// Set the virtual register of the \p PartialMapIdx-th partial mapping of
+    /// the OpIdx-th operand to \p NewVReg.
+    ///
+    /// \pre getMI().getOperand(OpIdx).isReg()
+    /// \pre getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() >
+    /// PartialMapIdx
+    /// \pre NewReg != 0
+    ///
+    /// \post the \p PartialMapIdx-th register of the value mapping of the \p
+    /// OpIdx-th operand has been set.
+    void setVRegs(unsigned OpIdx, unsigned PartialMapIdx, unsigned NewVReg);
+
+    /// Get all the virtual registers required to map the \p OpIdx-th operand of
+    /// the instruction.
+    ///
+    /// This return an empty range when createVRegs or setVRegs has not been
+    /// called.
+    /// The iterator may be invalidated by a call to setVRegs or createVRegs.
+    ///
+    /// When \p ForDebug is true, we will not check that the list of new virtual
+    /// registers does not contain uninitialized values.
+    ///
+    /// \pre getMI().getOperand(OpIdx).isReg()
+    /// \pre ForDebug || All partial mappings have been set a register
+    iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+    getVRegs(unsigned OpIdx, bool ForDebug = false) const;
+
+    /// Print this operands mapper on dbgs() stream.
+    void dump() const;
+
+    /// Print this operands mapper on \p OS stream.
+    void print(raw_ostream &OS, bool ForDebug = false) const;
+  };
+
+protected:
+  /// Hold the set of supported register banks.
+  std::unique_ptr<RegisterBank[]> RegBanks;
+  /// Total number of register banks.
+  unsigned NumRegBanks;
+
+  /// Mapping from MVT::SimpleValueType to register banks.
+  std::unique_ptr<const RegisterBank *[]> VTToRegBank;
+
+  /// Create a RegisterBankInfo that can accomodate up to \p NumRegBanks
+  /// RegisterBank instances.
+  ///
+  /// \note For the verify method to succeed all the \p NumRegBanks
+  /// must be initialized by createRegisterBank and updated with
+  /// addRegBankCoverage RegisterBank.
+  RegisterBankInfo(unsigned NumRegBanks);
+
+  /// This constructor is meaningless.
+  /// It just provides a default constructor that can be used at link time
+  /// when GlobalISel is not built.
+  /// That way, targets can still inherit from this class without doing
+  /// crazy gymnastic to avoid link time failures.
+  /// \note That works because the constructor is inlined.
+  RegisterBankInfo() {
+    llvm_unreachable("This constructor should not be executed");
+  }
+
+  /// Create a new register bank with the given parameter and add it
+  /// to RegBanks.
+  /// \pre \p ID must not already be used.
+  /// \pre \p ID < NumRegBanks.
+  void createRegisterBank(unsigned ID, const char *Name);
+
+  /// Add \p RCId to the set of register class that the register bank,
+  /// identified \p ID, covers.
+  /// This method transitively adds all the sub classes and the subreg-classes
+  /// of \p RCId to the set of covered register classes.
+  /// It also adjusts the size of the register bank to reflect the maximal
+  /// size of a value that can be hold into that register bank.
+  ///
+  /// If \p AddTypeMapping is true, this method also records what types can
+  /// be mapped to \p ID. Although this done by default, targets may want to
+  /// disable it, espicially if a given type may be mapped on different
+  /// register bank. Indeed, in such case, this method only records the
+  /// first register bank where the type matches.
+  /// This information is only used to provide default mapping
+  /// (see getInstrMappingImpl).
+  ///
+  /// \note This method does *not* add the super classes of \p RCId.
+  /// The rationale is if \p ID covers the registers of \p RCId, that
+  /// does not necessarily mean that \p ID covers the set of registers
+  /// of RCId's superclasses.
+  /// This method does *not* add the superreg classes as well for consistents.
+  /// The expected use is to add the coverage top-down with respect to the
+  /// register hierarchy.
+  ///
+  /// \todo TableGen should just generate the BitSet vector for us.
+  void addRegBankCoverage(unsigned ID, unsigned RCId,
+                          const TargetRegisterInfo &TRI,
+                          bool AddTypeMapping = true);
+
+  /// Get the register bank identified by \p ID.
+  RegisterBank &getRegBank(unsigned ID) {
+    assert(ID < getNumRegBanks() && "Accessing an unknown register bank");
+    return RegBanks[ID];
+  }
+
+  /// Get the register bank that has been recorded to cover \p SVT.
+  const RegisterBank *getRegBankForType(MVT::SimpleValueType SVT) const {
+    if (!VTToRegBank)
+      return nullptr;
+    assert(SVT < MVT::SimpleValueType::LAST_VALUETYPE && "Out-of-bound access");
+    return VTToRegBank.get()[SVT];
+  }
+
+  /// Record \p RegBank as the register bank that covers \p SVT.
+  /// If a record was already set for \p SVT, the mapping is not
+  /// updated, unless \p Force == true
+  ///
+  /// \post if getRegBankForType(SVT)\@pre == nullptr then
+  ///                       getRegBankForType(SVT) == &RegBank
+  /// \post if Force == true then getRegBankForType(SVT) == &RegBank
+  void recordRegBankForType(const RegisterBank &RegBank,
+                            MVT::SimpleValueType SVT, bool Force = false) {
+    if (!VTToRegBank) {
+      VTToRegBank.reset(
+          new const RegisterBank *[MVT::SimpleValueType::LAST_VALUETYPE]);
+      std::fill(&VTToRegBank[0],
+                &VTToRegBank[MVT::SimpleValueType::LAST_VALUETYPE], nullptr);
+    }
+    assert(SVT < MVT::SimpleValueType::LAST_VALUETYPE && "Out-of-bound access");
+    // If we want to override the mapping or the mapping does not exits yet,
+    // set the register bank for SVT.
+    if (Force || !getRegBankForType(SVT))
+      VTToRegBank.get()[SVT] = &RegBank;
+  }
+
+  /// Try to get the mapping of \p MI.
+  /// See getInstrMapping for more details on what a mapping represents.
+  ///
+  /// Unlike getInstrMapping the returned InstructionMapping may be invalid
+  /// (isValid() == false).
+  /// This means that the target independent code is not smart enough
+  /// to get the mapping of \p MI and thus, the target has to provide the
+  /// information for \p MI.
+  ///
+  /// This implementation is able to get the mapping of:
+  /// - Target specific instructions by looking at the encoding constraints.
+  /// - Any instruction if all the register operands are already been assigned
+  ///   a register, a register class, or a register bank.
+  /// - Copies and phis if at least one of the operand has been assigned a
+  ///   register, a register class, or a register bank.
+  /// In other words, this method will likely fail to find a mapping for
+  /// any generic opcode that has not been lowered by target specific code.
+  InstructionMapping getInstrMappingImpl(const MachineInstr &MI) const;
+
+  /// Get the register bank for the \p OpIdx-th operand of \p MI form
+  /// the encoding constraints, if any.
+  ///
+  /// \return A register bank that covers the register class of the
+  /// related encoding constraints or nullptr if \p MI did not provide
+  /// enough information to deduce it.
+  const RegisterBank *
+  getRegBankFromConstraints(const MachineInstr &MI, unsigned OpIdx,
+                            const TargetInstrInfo &TII,
+                            const TargetRegisterInfo &TRI) const;
+
+  /// Helper method to apply something that is like the default mapping.
+  /// Basically, that means that \p OpdMapper.getMI() is left untouched
+  /// aside from the reassignment of the register operand that have been
+  /// remapped.
+  /// If the mapping of one of the operand spans several registers, this
+  /// method will abort as this is not like a default mapping anymore.
+  ///
+  /// \pre For OpIdx in {0..\p OpdMapper.getMI().getNumOperands())
+  ///        the range OpdMapper.getVRegs(OpIdx) is empty or of size 1.
+  static void applyDefaultMapping(const OperandsMapper &OpdMapper);
+
+  /// See ::applyMapping.
+  virtual void applyMappingImpl(const OperandsMapper &OpdMapper) const {
+    llvm_unreachable("The target has to implement that part");
+  }
+
+public:
+  virtual ~RegisterBankInfo() {}
+
+  /// Get the register bank identified by \p ID.
+  const RegisterBank &getRegBank(unsigned ID) const {
+    return const_cast<RegisterBankInfo *>(this)->getRegBank(ID);
+  }
+
+  /// Get the register bank of \p Reg.
+  /// If Reg has not been assigned a register, a register class,
+  /// or a register bank, then this returns nullptr.
+  ///
+  /// \pre Reg != 0 (NoRegister)
+  const RegisterBank *getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
+                                 const TargetRegisterInfo &TRI) const;
+
+  /// Get the total number of register banks.
+  unsigned getNumRegBanks() const { return NumRegBanks; }
+
+  /// Get a register bank that covers \p RC.
+  ///
+  /// \pre \p RC is a user-defined register class (as opposed as one
+  /// generated by TableGen).
+  ///
+  /// \note The mapping RC -> RegBank could be built while adding the
+  /// coverage for the register banks. However, we do not do it, because,
+  /// at least for now, we only need this information for register classes
+  /// that are used in the description of instruction. In other words,
+  /// there are just a handful of them and we do not want to waste space.
+  ///
+  /// \todo This should be TableGen'ed.
+  virtual const RegisterBank &
+  getRegBankFromRegClass(const TargetRegisterClass &RC) const {
+    llvm_unreachable("The target must override this method");
+  }
+
+  /// Get the cost of a copy from \p B to \p A, or put differently,
+  /// get the cost of A = COPY B. Since register banks may cover
+  /// different size, \p Size specifies what will be the size in bits
+  /// that will be copied around.
+  ///
+  /// \note Since this is a copy, both registers have the same size.
+  virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
+                            unsigned Size) const {
+    // Optimistically assume that copies are coalesced. I.e., when
+    // they are on the same bank, they are free.
+    // Otherwise assume a non-zero cost of 1. The targets are supposed
+    // to override that properly anyway if they care.
+    return &A != &B;
+  }
+
+  /// Identifier used when the related instruction mapping instance
+  /// is generated by target independent code.
+  /// Make sure not to use that identifier to avoid possible collision.
+  static const unsigned DefaultMappingID;
+
+  /// Identifier used when the related instruction mapping instance
+  /// is generated by the default constructor.
+  /// Make sure not to use that identifier.
+  static const unsigned InvalidMappingID;
+
+  /// Get the mapping of the different operands of \p MI
+  /// on the register bank.
+  /// This mapping should be the direct translation of \p MI.
+  /// In other words, when \p MI is mapped with the returned mapping,
+  /// only the register banks of the operands of \p MI need to be updated.
+  /// In particular, neither the opcode or the type of \p MI needs to be
+  /// updated for this direct mapping.
+  ///
+  /// The target independent implementation gives a mapping based on
+  /// the register classes for the target specific opcode.
+  /// It uses the ID RegisterBankInfo::DefaultMappingID for that mapping.
+  /// Make sure you do not use that ID for the alternative mapping
+  /// for MI. See getInstrAlternativeMappings for the alternative
+  /// mappings.
+  ///
+  /// For instance, if \p MI is a vector add, the mapping should
+  /// not be a scalarization of the add.
+  ///
+  /// \post returnedVal.verify(MI).
+  ///
+  /// \note If returnedVal does not verify MI, this would probably mean
+  /// that the target does not support that instruction.
+  virtual InstructionMapping getInstrMapping(const MachineInstr &MI) const;
+
+  /// Get the alternative mappings for \p MI.
+  /// Alternative in the sense different from getInstrMapping.
+  virtual InstructionMappings
+  getInstrAlternativeMappings(const MachineInstr &MI) const;
+
+  /// Get the possible mapping for \p MI.
+  /// A mapping defines where the different operands may live and at what cost.
+  /// For instance, let us consider:
+  /// v0(16) = G_ADD <2 x i8> v1, v2
+  /// The possible mapping could be:
+  ///
+  /// {/*ID*/VectorAdd, /*Cost*/1, /*v0*/{(0xFFFF, VPR)}, /*v1*/{(0xFFFF, VPR)},
+  ///                              /*v2*/{(0xFFFF, VPR)}}
+  /// {/*ID*/ScalarAddx2, /*Cost*/2, /*v0*/{(0x00FF, GPR),(0xFF00, GPR)},
+  ///                                /*v1*/{(0x00FF, GPR),(0xFF00, GPR)},
+  ///                                /*v2*/{(0x00FF, GPR),(0xFF00, GPR)}}
+  ///
+  /// \note The first alternative of the returned mapping should be the
+  /// direct translation of \p MI current form.
+  ///
+  /// \post !returnedVal.empty().
+  InstructionMappings getInstrPossibleMappings(const MachineInstr &MI) const;
+
+  /// Apply \p OpdMapper.getInstrMapping() to \p OpdMapper.getMI().
+  /// After this call \p OpdMapper.getMI() may not be valid anymore.
+  /// \p OpdMapper.getInstrMapping().getID() carries the information of
+  /// what has been chosen to map \p OpdMapper.getMI(). This ID is set
+  /// by the various getInstrXXXMapping method.
+  ///
+  /// Therefore, getting the mapping and applying it should be kept in
+  /// sync.
+  void applyMapping(const OperandsMapper &OpdMapper) const {
+    // The only mapping we know how to handle is the default mapping.
+    if (OpdMapper.getInstrMapping().getID() == DefaultMappingID)
+      return applyDefaultMapping(OpdMapper);
+    // For other mapping, the target needs to do the right thing.
+    // If that means calling applyDefaultMapping, fine, but this
+    // must be explicitly stated.
+    applyMappingImpl(OpdMapper);
+  }
+
+  /// Get the size in bits of \p Reg.
+  /// Utility method to get the size of any registers. Unlike
+  /// MachineRegisterInfo::getSize, the register does not need to be a
+  /// virtual register.
+  ///
+  /// \pre \p Reg != 0 (NoRegister).
+  static unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI,
+                                const TargetRegisterInfo &TRI);
+
+  /// Check that information hold by this instance make sense for the
+  /// given \p TRI.
+  ///
+  /// \note This method does not check anything when assertions are disabled.
+  ///
+  /// \return True is the check was successful.
+  bool verify(const TargetRegisterInfo &TRI) const;
+};
+
+inline raw_ostream &
+operator<<(raw_ostream &OS,
+           const RegisterBankInfo::PartialMapping &PartMapping) {
+  PartMapping.print(OS);
+  return OS;
+}
+
+inline raw_ostream &
+operator<<(raw_ostream &OS, const RegisterBankInfo::ValueMapping &ValMapping) {
+  ValMapping.print(OS);
+  return OS;
+}
+
+inline raw_ostream &
+operator<<(raw_ostream &OS,
+           const RegisterBankInfo::InstructionMapping &InstrMapping) {
+  InstrMapping.print(OS);
+  return OS;
+}
+
+inline raw_ostream &
+operator<<(raw_ostream &OS, const RegisterBankInfo::OperandsMapper &OpdMapper) {
+  OpdMapper.print(OS, /*ForDebug*/ false);
+  return OS;
+}
+} // End namespace llvm.
+
+#endif
diff --git a/include/llvm/CodeGen/GlobalISel/Types.h b/include/llvm/CodeGen/GlobalISel/Types.h
new file mode 100644
index 0000000000000..7d974878d3b9f
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/Types.h
@@ -0,0 +1,32 @@
+//===-- llvm/CodeGen/GlobalISel/Types.h - Types used by GISel ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file describes high level types that are used by several passes or
+/// APIs involved in the GlobalISel pipeline.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_TYPES_H
+#define LLVM_CODEGEN_GLOBALISEL_TYPES_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Value.h"
+
+namespace llvm {
+
+/// Map a value to a virtual register.
+/// For now, we chose to map aggregate types to on single virtual
+/// register. This might be revisited if it turns out to be inefficient.
+/// PR26161 tracks that.
+/// Note: We need to expose this type to the target hooks for thing like
+/// ABI lowering that would be used during IRTranslation.
+typedef DenseMap<const Value *, unsigned> ValueToVReg;
+
+} // End namespace llvm.
+#endif
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 158ff3cd36a83..89cb7a86f99f6 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -257,6 +257,9 @@ namespace ISD {
     /// value as an integer 0/1 value.
     FGETSIGN,
 
+    /// Returns platform specific canonical encoding of a floating point number.
+    FCANONICALIZE,
+
     /// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the
     /// specified, possibly variable, elements.  The number of elements is
     /// required to be a power of two.  The types of the operands must all be
@@ -483,6 +486,12 @@ namespace ISD {
     /// the same bit size (e.g.  f32 <-> i32).  This can also be used for
     /// int-to-int or fp-to-fp conversions, but that is a noop, deleted by
     /// getNode().
+    ///
+    /// This operator is subtly different from the bitcast instruction from
+    /// LLVM-IR since this node may change the bits in the register. For
+    /// example, this occurs on big-endian NEON and big-endian MSA where the
+    /// layout of the bits in the register depends on the vector type and this
+    /// operator acts as a shuffle operation for some vector type combinations.
     BITCAST,
 
     /// ADDRSPACECAST - This operator converts between pointers of different
@@ -869,56 +878,52 @@ namespace ISD {
     SETCC_INVALID       // Marker value.
   };
 
-  /// isSignedIntSetCC - Return true if this is a setcc instruction that
-  /// performs a signed comparison when used with integer operands.
+  /// Return true if this is a setcc instruction that performs a signed
+  /// comparison when used with integer operands.
   inline bool isSignedIntSetCC(CondCode Code) {
     return Code == SETGT || Code == SETGE || Code == SETLT || Code == SETLE;
   }
 
-  /// isUnsignedIntSetCC - Return true if this is a setcc instruction that
-  /// performs an unsigned comparison when used with integer operands.
+  /// Return true if this is a setcc instruction that performs an unsigned
+  /// comparison when used with integer operands.
   inline bool isUnsignedIntSetCC(CondCode Code) {
     return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE;
   }
 
-  /// isTrueWhenEqual - Return true if the specified condition returns true if
-  /// the two operands to the condition are equal.  Note that if one of the two
-  /// operands is a NaN, this value is meaningless.
+  /// Return true if the specified condition returns true if the two operands to
+  /// the condition are equal. Note that if one of the two operands is a NaN,
+  /// this value is meaningless.
   inline bool isTrueWhenEqual(CondCode Cond) {
     return ((int)Cond & 1) != 0;
   }
 
-  /// getUnorderedFlavor - This function returns 0 if the condition is always
-  /// false if an operand is a NaN, 1 if the condition is always true if the
-  /// operand is a NaN, and 2 if the condition is undefined if the operand is a
-  /// NaN.
+  /// This function returns 0 if the condition is always false if an operand is
+  /// a NaN, 1 if the condition is always true if the operand is a NaN, and 2 if
+  /// the condition is undefined if the operand is a NaN.
   inline unsigned getUnorderedFlavor(CondCode Cond) {
     return ((int)Cond >> 3) & 3;
   }
 
-  /// getSetCCInverse - Return the operation corresponding to !(X op Y), where
-  /// 'op' is a valid SetCC operation.
+  /// Return the operation corresponding to !(X op Y), where 'op' is a valid
+  /// SetCC operation.
   CondCode getSetCCInverse(CondCode Operation, bool isInteger);
 
-  /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
-  /// when given the operation for (X op Y).
+  /// Return the operation corresponding to (Y op X) when given the operation
+  /// for (X op Y).
   CondCode getSetCCSwappedOperands(CondCode Operation);
 
-  /// getSetCCOrOperation - Return the result of a logical OR between different
-  /// comparisons of identical values: ((X op1 Y) | (X op2 Y)).  This
-  /// function returns SETCC_INVALID if it is not possible to represent the
-  /// resultant comparison.
+  /// Return the result of a logical OR between different comparisons of
+  /// identical values: ((X op1 Y) | (X op2 Y)). This function returns
+  /// SETCC_INVALID if it is not possible to represent the resultant comparison.
   CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger);
 
-  /// getSetCCAndOperation - Return the result of a logical AND between
-  /// different comparisons of identical values: ((X op1 Y) & (X op2 Y)).  This
-  /// function returns SETCC_INVALID if it is not possible to represent the
-  /// resultant comparison.
+  /// Return the result of a logical AND between different comparisons of
+  /// identical values: ((X op1 Y) & (X op2 Y)). This function returns
+  /// SETCC_INVALID if it is not possible to represent the resultant comparison.
   CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger);
 
   //===--------------------------------------------------------------------===//
-  /// CvtCode enum - This enum defines the various converts CONVERT_RNDSAT
-  /// supports.
+  /// This enum defines the various converts CONVERT_RNDSAT supports.
   enum CvtCode {
     CVT_FF,     /// Float from Float
     CVT_FS,     /// Float from Signed
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index f1ea2c03f13cc..04e840dea2caa 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -190,8 +190,8 @@ namespace llvm {
       void dump() const;
     };
 
-    typedef SmallVector<Segment,4> Segments;
-    typedef SmallVector<VNInfo*,4> VNInfoList;
+    typedef SmallVector<Segment, 2> Segments;
+    typedef SmallVector<VNInfo *, 2> VNInfoList;
 
     Segments segments;   // the liveness segments
     VNInfoList valnos;   // value#'s
@@ -613,6 +613,9 @@ namespace llvm {
                BumpPtrAllocator &Allocator)
         : LiveRange(Other, Allocator), Next(nullptr), LaneMask(LaneMask) {
       }
+
+      void print(raw_ostream &OS) const;
+      void dump() const;
     };
 
   private:
@@ -712,10 +715,6 @@ namespace llvm {
     /// are not considered valid and should only exist temporarily).
     void removeEmptySubRanges();
 
-    /// Construct main live range by merging the SubRanges of @p LI.
-    void constructMainRangeFromSubranges(const SlotIndexes &Indexes,
-                                         VNInfo::Allocator &VNIAllocator);
-
     /// getSize - Returns the sum of sizes of all the LiveRange's.
     ///
     unsigned getSize() const;
@@ -759,6 +758,12 @@ namespace llvm {
     void freeSubRange(SubRange *S);
   };
 
+  inline raw_ostream &operator<<(raw_ostream &OS,
+                                 const LiveInterval::SubRange &SR) {
+    SR.print(OS);
+    return OS;
+  }
+
   inline raw_ostream &operator<<(raw_ostream &OS, const LiveInterval &LI) {
     LI.print(OS);
     return OS;
@@ -868,6 +873,5 @@ namespace llvm {
     void Distribute(LiveInterval &LI, LiveInterval *LIV[],
                     MachineRegisterInfo &MRI);
   };
-
 }
 #endif
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 87421e2f83b4a..d4ee0582cc412 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -31,7 +31,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <cmath>
-#include <iterator>
 
 namespace llvm {
 
@@ -105,7 +104,7 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     // Calculate the spill weight to assign to a single instruction.
     static float getSpillWeight(bool isDef, bool isUse,
                                 const MachineBlockFrequencyInfo *MBFI,
-                                const MachineInstr *Instr);
+                                const MachineInstr &Instr);
 
     LiveInterval &getInterval(unsigned Reg) {
       if (hasInterval(Reg))
@@ -145,7 +144,7 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     /// Given a register and an instruction, adds a live segment from that
     /// instruction to the end of its MBB.
     LiveInterval::Segment addSegmentToEndOfBlock(unsigned reg,
-                                                 MachineInstr* startInst);
+                                                 MachineInstr &startInst);
 
     /// After removing some uses of a register, shrink its live range to just
     /// the remaining uses. This method does not compute reaching defs for new
@@ -195,13 +194,13 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
 
     /// isNotInMIMap - returns true if the specified machine instr has been
     /// removed or was never entered in the map.
-    bool isNotInMIMap(const MachineInstr* Instr) const {
+    bool isNotInMIMap(const MachineInstr &Instr) const {
       return !Indexes->hasIndex(Instr);
     }
 
     /// Returns the base index of the given instruction.
-    SlotIndex getInstructionIndex(const MachineInstr *instr) const {
-      return Indexes->getInstructionIndex(instr);
+    SlotIndex getInstructionIndex(const MachineInstr &Instr) const {
+      return Indexes->getInstructionIndex(Instr);
     }
 
     /// Returns the instruction associated with the given index.
@@ -240,21 +239,21 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
       RegMaskBlocks.push_back(std::make_pair(RegMaskSlots.size(), 0));
     }
 
-    SlotIndex InsertMachineInstrInMaps(MachineInstr *MI) {
+    SlotIndex InsertMachineInstrInMaps(MachineInstr &MI) {
       return Indexes->insertMachineInstrInMaps(MI);
     }
 
     void InsertMachineInstrRangeInMaps(MachineBasicBlock::iterator B,
                                        MachineBasicBlock::iterator E) {
       for (MachineBasicBlock::iterator I = B; I != E; ++I)
-        Indexes->insertMachineInstrInMaps(I);
+        Indexes->insertMachineInstrInMaps(*I);
     }
 
-    void RemoveMachineInstrFromMaps(MachineInstr *MI) {
+    void RemoveMachineInstrFromMaps(MachineInstr &MI) {
       Indexes->removeMachineInstrFromMaps(MI);
     }
 
-    void ReplaceMachineInstrInMaps(MachineInstr *MI, MachineInstr *NewMI) {
+    void ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI) {
       Indexes->replaceMachineInstrInMaps(MI, NewMI);
     }
 
@@ -288,7 +287,7 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     /// are not supported.
     ///
     /// \param UpdateFlags Update live intervals for nonallocatable physregs.
-    void handleMove(MachineInstr* MI, bool UpdateFlags = false);
+    void handleMove(MachineInstr &MI, bool UpdateFlags = false);
 
     /// moveIntoBundle - Update intervals for operands of MI so that they
     /// begin/end on the SlotIndex for BundleStart.
@@ -298,7 +297,7 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     /// Requires MI and BundleStart to have SlotIndexes, and assumes
     /// existing liveness is accurate. BundleStart should be the first
     /// instruction in the Bundle.
-    void handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart,
+    void handleMoveIntoBundle(MachineInstr &MI, MachineInstr &BundleStart,
                               bool UpdateFlags = false);
 
     /// repairIntervalsInRange - Update live intervals for instructions in a
@@ -406,6 +405,11 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     void splitSeparateComponents(LiveInterval &LI,
                                  SmallVectorImpl<LiveInterval*> &SplitLIs);
 
+    /// For live interval \p LI with correct SubRanges construct matching
+    /// information for the main live range. Expects the main live range to not
+    /// have any segments or value numbers.
+    void constructMainRangeFromSubranges(LiveInterval &LI);
+
   private:
     /// Compute live intervals for all virtual registers.
     void computeVirtRegs();
diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h
index 3bdf5ae8d0132..1cea9d5b90d64 100644
--- a/include/llvm/CodeGen/LivePhysRegs.h
+++ b/include/llvm/CodeGen/LivePhysRegs.h
@@ -84,12 +84,8 @@ public:
   void removeReg(unsigned Reg) {
     assert(TRI && "LivePhysRegs is not initialized.");
     assert(Reg <= TRI->getNumRegs() && "Expected a physical register.");
-    for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
-         SubRegs.isValid(); ++SubRegs)
-      LiveRegs.erase(*SubRegs);
-    for (MCSuperRegIterator SuperRegs(Reg, TRI, /*IncludeSelf=*/false);
-         SuperRegs.isValid(); ++SuperRegs)
-      LiveRegs.erase(*SuperRegs);
+    for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R)
+      LiveRegs.erase(*R);
   }
 
   /// \brief Removes physical registers clobbered by the regmask operand @p MO.
@@ -97,10 +93,15 @@ public:
         SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers);
 
   /// \brief Returns true if register @p Reg is contained in the set. This also
-  /// works if only the super register of @p Reg has been defined, because we
-  /// always add also all sub-registers to the set.
+  /// works if only the super register of @p Reg has been defined, because
+  /// addReg() always adds all sub-registers to the set as well.
+  /// Note: Returns false if just some sub registers are live, use available()
+  /// when searching a free register.
   bool contains(unsigned Reg) const { return LiveRegs.count(Reg); }
 
+  /// Returns true if register \p Reg and no aliasing register is in the set.
+  bool available(const MachineRegisterInfo &MRI, unsigned Reg) const;
+
   /// \brief Simulates liveness when stepping backwards over an
   /// instruction(bundle): Remove Defs, add uses. This is the recommended way of
   /// calculating liveness.
@@ -116,15 +117,20 @@ public:
   void stepForward(const MachineInstr &MI,
         SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers);
 
-  /// \brief Adds all live-in registers of basic block @p MBB; After prologue/
-  /// epilogue insertion \p AddPristines should be set to true to insert the
+  /// Adds all live-in registers of basic block @p MBB.
+  /// Live in registers are the registers in the blocks live-in list and the
   /// pristine registers.
-  void addLiveIns(const MachineBasicBlock *MBB, bool AddPristines = false);
+  void addLiveIns(const MachineBasicBlock &MBB);
+
+  /// Adds all live-out registers of basic block @p MBB.
+  /// Live out registers are the union of the live-in registers of the successor
+  /// blocks and pristine registers. Live out registers of the end block are the
+  /// callee saved registers.
+  void addLiveOuts(const MachineBasicBlock &MBB);
 
-  /// \brief Adds all live-out registers of basic block @p MBB; After prologue/
-  /// epilogue insertion \p AddPristinesAndCSRs should be set to true.
-  void addLiveOuts(const MachineBasicBlock *MBB,
-                   bool AddPristinesAndCSRs = false);
+  /// Like addLiveOuts() but does not add pristine registers/callee saved
+  /// registers.
+  void addLiveOutsNoPristines(const MachineBasicBlock &MBB);
 
   typedef SparseSet<unsigned>::const_iterator const_iterator;
   const_iterator begin() const { return LiveRegs.begin(); }
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index 2271e3352aa27..4250777682ba5 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -72,6 +72,10 @@ private:
   /// ScannedRemattable - true when remattable values have been identified.
   bool ScannedRemattable;
 
+  /// DeadRemats - The saved instructions which have already been dead after
+  /// rematerialization but not deleted yet -- to be done in postOptimization.
+  SmallPtrSet<MachineInstr *, 32> *DeadRemats;
+
   /// Remattable - Values defined by remattable instructions as identified by
   /// tii.isTriviallyReMaterializable().
   SmallPtrSet<const VNInfo*,4> Remattable;
@@ -96,7 +100,8 @@ private:
                     SmallVector<LiveInterval*, 8>,
                     SmallPtrSet<LiveInterval*, 8> > ToShrinkSet;
   /// Helper for eliminateDeadDefs.
-  void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink);
+  void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
+                        AliasAnalysis *AA);
 
   /// MachineRegisterInfo callback to notify when new virtual
   /// registers are created.
@@ -116,13 +121,16 @@ public:
   /// @param vrm Map of virtual registers to physical registers for this
   ///            function.  If NULL, no virtual register map updates will
   ///            be done.  This could be the case if called before Regalloc.
+  /// @param deadRemats The collection of all the instructions defining an
+  ///                   original reg and are dead after remat.
   LiveRangeEdit(LiveInterval *parent, SmallVectorImpl<unsigned> &newRegs,
                 MachineFunction &MF, LiveIntervals &lis, VirtRegMap *vrm,
-                Delegate *delegate = nullptr)
+                Delegate *delegate = nullptr,
+                SmallPtrSet<MachineInstr *, 32> *deadRemats = nullptr)
       : Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis),
-        VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()),
-        TheDelegate(delegate), FirstNew(newRegs.size()),
-        ScannedRemattable(false) {
+        VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), TheDelegate(delegate),
+        FirstNew(newRegs.size()), ScannedRemattable(false),
+        DeadRemats(deadRemats) {
     MRI.setDelegate(this);
   }
 
@@ -142,6 +150,16 @@ public:
   bool empty() const { return size() == 0; }
   unsigned get(unsigned idx) const { return NewRegs[idx+FirstNew]; }
 
+  /// pop_back - It allows LiveRangeEdit users to drop new registers.
+  /// The context is when an original def instruction of a register is
+  /// dead after rematerialization, we still want to keep it for following
+  /// rematerializations. We save the def instruction in DeadRemats,
+  /// and replace the original dst register with a new dummy register so
+  /// the live range of original dst register can be shrinked normally.
+  /// We don't want to allocate phys register for the dummy register, so
+  /// we want to drop it from the NewRegs set.
+  void pop_back() { NewRegs.pop_back(); }
+
   ArrayRef<unsigned> regs() const {
     return makeArrayRef(NewRegs).slice(FirstNew);
   }
@@ -175,15 +193,15 @@ public:
   /// Remat - Information needed to rematerialize at a specific location.
   struct Remat {
     VNInfo *ParentVNI;      // parent_'s value at the remat location.
-    MachineInstr *OrigMI;   // Instruction defining ParentVNI.
+    MachineInstr *OrigMI;   // Instruction defining OrigVNI. It contains the
+                            // real expr for remat.
     explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(nullptr) {}
   };
 
   /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
   /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
   /// When cheapAsAMove is set, only cheap remats are allowed.
-  bool canRematerializeAt(Remat &RM,
-                          SlotIndex UseIdx,
+  bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx,
                           bool cheapAsAMove);
 
   /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
@@ -208,6 +226,12 @@ public:
     return Rematted.count(ParentVNI);
   }
 
+  void markDeadRemat(MachineInstr *inst) {
+    // DeadRemats is an optional field.
+    if (DeadRemats)
+      DeadRemats->insert(inst);
+  }
+
   /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
   /// to erase it from LIS.
   void eraseVirtReg(unsigned Reg);
@@ -218,8 +242,9 @@ public:
   /// RegsBeingSpilled lists registers currently being spilled by the register
   /// allocator.  These registers should not be split into new intervals
   /// as currently those new intervals are not guaranteed to spill.
-  void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
-                         ArrayRef<unsigned> RegsBeingSpilled = None);
+  void eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
+                         ArrayRef<unsigned> RegsBeingSpilled = None,
+                         AliasAnalysis *AA = nullptr);
 
   /// calculateRegClassAndHint - Recompute register class and hint for each new
   /// register.
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index 55b97dc3e71d9..bc210dda08c09 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -91,9 +91,9 @@ public:
     /// removeKill - Delete a kill corresponding to the specified
     /// machine instruction. Returns true if there was a kill
     /// corresponding to this instruction, false otherwise.
-    bool removeKill(MachineInstr *MI) {
-      std::vector<MachineInstr*>::iterator
-        I = std::find(Kills.begin(), Kills.end(), MI);
+    bool removeKill(MachineInstr &MI) {
+      std::vector<MachineInstr *>::iterator I =
+          std::find(Kills.begin(), Kills.end(), &MI);
       if (I == Kills.end())
         return false;
       Kills.erase(I);
@@ -155,10 +155,10 @@ private:   // Intermediate data structures
   /// HandleRegMask - Call HandlePhysRegKill for all registers clobbered by Mask.
   void HandleRegMask(const MachineOperand&);
 
-  void HandlePhysRegUse(unsigned Reg, MachineInstr *MI);
+  void HandlePhysRegUse(unsigned Reg, MachineInstr &MI);
   void HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
                         SmallVectorImpl<unsigned> &Defs);
-  void UpdatePhysRegDefs(MachineInstr *MI, SmallVectorImpl<unsigned> &Defs);
+  void UpdatePhysRegDefs(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs);
 
   /// FindLastRefOrPartRef - Return the last reference or partial reference of
   /// the specified register.
@@ -176,7 +176,7 @@ private:   // Intermediate data structures
   /// is coming from.
   void analyzePHINodes(const MachineFunction& Fn);
 
-  void runOnInstr(MachineInstr *MI, SmallVectorImpl<unsigned> &Defs);
+  void runOnInstr(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs);
 
   void runOnBlock(MachineBasicBlock *MBB, unsigned NumRegs);
 public:
@@ -185,37 +185,37 @@ public:
 
   /// RegisterDefIsDead - Return true if the specified instruction defines the
   /// specified register, but that definition is dead.
-  bool RegisterDefIsDead(MachineInstr *MI, unsigned Reg) const;
+  bool RegisterDefIsDead(MachineInstr &MI, unsigned Reg) const;
 
   //===--------------------------------------------------------------------===//
   //  API to update live variable information
 
   /// replaceKillInstruction - Update register kill info by replacing a kill
   /// instruction with a new one.
-  void replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
-                              MachineInstr *NewMI);
+  void replaceKillInstruction(unsigned Reg, MachineInstr &OldMI,
+                              MachineInstr &NewMI);
 
   /// addVirtualRegisterKilled - Add information about the fact that the
   /// specified register is killed after being used by the specified
   /// instruction. If AddIfNotFound is true, add a implicit operand if it's
   /// not found.
-  void addVirtualRegisterKilled(unsigned IncomingReg, MachineInstr *MI,
+  void addVirtualRegisterKilled(unsigned IncomingReg, MachineInstr &MI,
                                 bool AddIfNotFound = false) {
-    if (MI->addRegisterKilled(IncomingReg, TRI, AddIfNotFound))
-      getVarInfo(IncomingReg).Kills.push_back(MI); 
+    if (MI.addRegisterKilled(IncomingReg, TRI, AddIfNotFound))
+      getVarInfo(IncomingReg).Kills.push_back(&MI);
   }
 
   /// removeVirtualRegisterKilled - Remove the specified kill of the virtual
   /// register from the live variable information. Returns true if the
   /// variable was marked as killed by the specified instruction,
   /// false otherwise.
-  bool removeVirtualRegisterKilled(unsigned reg, MachineInstr *MI) {
+  bool removeVirtualRegisterKilled(unsigned reg, MachineInstr &MI) {
     if (!getVarInfo(reg).removeKill(MI))
       return false;
 
     bool Removed = false;
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
       if (MO.isReg() && MO.isKill() && MO.getReg() == reg) {
         MO.setIsKill(false);
         Removed = true;
@@ -230,28 +230,28 @@ public:
 
   /// removeVirtualRegistersKilled - Remove all killed info for the specified
   /// instruction.
-  void removeVirtualRegistersKilled(MachineInstr *MI);
+  void removeVirtualRegistersKilled(MachineInstr &MI);
 
   /// addVirtualRegisterDead - Add information about the fact that the specified
   /// register is dead after being used by the specified instruction. If
   /// AddIfNotFound is true, add a implicit operand if it's not found.
-  void addVirtualRegisterDead(unsigned IncomingReg, MachineInstr *MI,
+  void addVirtualRegisterDead(unsigned IncomingReg, MachineInstr &MI,
                               bool AddIfNotFound = false) {
-    if (MI->addRegisterDead(IncomingReg, TRI, AddIfNotFound))
-      getVarInfo(IncomingReg).Kills.push_back(MI);
+    if (MI.addRegisterDead(IncomingReg, TRI, AddIfNotFound))
+      getVarInfo(IncomingReg).Kills.push_back(&MI);
   }
 
   /// removeVirtualRegisterDead - Remove the specified kill of the virtual
   /// register from the live variable information. Returns true if the
   /// variable was marked dead at the specified instruction, false
   /// otherwise.
-  bool removeVirtualRegisterDead(unsigned reg, MachineInstr *MI) {
+  bool removeVirtualRegisterDead(unsigned reg, MachineInstr &MI) {
     if (!getVarInfo(reg).removeKill(MI))
       return false;
 
     bool Removed = false;
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
       if (MO.isReg() && MO.isDef() && MO.getReg() == reg) {
         MO.setIsDead(false);
         Removed = true;
@@ -278,9 +278,8 @@ public:
   void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock,
                                MachineBasicBlock *BB,
                                std::vector<MachineBasicBlock*> &WorkList);
-  void HandleVirtRegDef(unsigned reg, MachineInstr *MI);
-  void HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
-                        MachineInstr *MI);
+  void HandleVirtRegDef(unsigned reg, MachineInstr &MI);
+  void HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, MachineInstr &MI);
 
   bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB) {
     return getVarInfo(Reg).isLiveIn(MBB, Reg, *MRI);
diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h
index a569d5ec1f5e8..dd0780397f429 100644
--- a/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -18,7 +18,6 @@
 #ifndef LLVM_CODEGEN_MIRPARSER_MIRPARSER_H
 #define LLVM_CODEGEN_MIRPARSER_MIRPARSER_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineFunctionInitializer.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -26,6 +25,7 @@
 
 namespace llvm {
 
+class StringRef;
 class MIRParserImpl;
 class SMDiagnostic;
 
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
index 14d3744741c5c..7f9c448333362 100644
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -7,9 +7,6 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// The MIR serialization library is currently a work in progress. It can't
-// serialize machine functions at this time.
-//
 // This file implements the mapping between various MIR data structures and
 // their corresponding YAML representation.
 //
@@ -385,6 +382,8 @@ struct MachineFunction {
   unsigned Alignment = 0;
   bool ExposesReturnsTwice = false;
   bool HasInlineAsm = false;
+  // MachineFunctionProperties
+  bool AllVRegsAllocated = false;
   // Register information
   bool IsSSA = false;
   bool TracksRegLiveness = false;
@@ -408,6 +407,7 @@ template <> struct MappingTraits<MachineFunction> {
     YamlIO.mapOptional("alignment", MF.Alignment);
     YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice);
     YamlIO.mapOptional("hasInlineAsm", MF.HasInlineAsm);
+    YamlIO.mapOptional("allVRegsAllocated", MF.AllVRegsAllocated);
     YamlIO.mapOptional("isSSA", MF.IsSSA);
     YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness);
     YamlIO.mapOptional("tracksSubRegLiveness", MF.TracksSubRegLiveness);
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 3d58c499823e8..d5f918eb2bb90 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -15,6 +15,8 @@
 #define LLVM_CODEGEN_MACHINEBASICBLOCK_H
 
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -157,80 +159,14 @@ public:
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
-  /// MachineBasicBlock iterator that automatically skips over MIs that are
-  /// inside bundles (i.e. walk top level MIs only).
-  template<typename Ty, typename IterTy>
-  class bundle_iterator
-    : public std::iterator<std::bidirectional_iterator_tag, Ty, ptrdiff_t> {
-    IterTy MII;
-
-  public:
-    bundle_iterator(IterTy MI) : MII(MI) {}
-
-    bundle_iterator(Ty &MI) : MII(MI) {
-      assert(!MI.isBundledWithPred() &&
-             "It's not legal to initialize bundle_iterator with a bundled MI");
-    }
-    bundle_iterator(Ty *MI) : MII(MI) {
-      assert((!MI || !MI->isBundledWithPred()) &&
-             "It's not legal to initialize bundle_iterator with a bundled MI");
-    }
-    // Template allows conversion from const to nonconst.
-    template<class OtherTy, class OtherIterTy>
-    bundle_iterator(const bundle_iterator<OtherTy, OtherIterTy> &I)
-      : MII(I.getInstrIterator()) {}
-    bundle_iterator() : MII(nullptr) {}
-
-    Ty &operator*() const { return *MII; }
-    Ty *operator->() const { return &operator*(); }
-
-    operator Ty *() const { return MII.getNodePtrUnchecked(); }
-
-    bool operator==(const bundle_iterator &X) const {
-      return MII == X.MII;
-    }
-    bool operator!=(const bundle_iterator &X) const {
-      return !operator==(X);
-    }
-
-    // Increment and decrement operators...
-    bundle_iterator &operator--() {      // predecrement - Back up
-      do --MII;
-      while (MII->isBundledWithPred());
-      return *this;
-    }
-    bundle_iterator &operator++() {      // preincrement - Advance
-      while (MII->isBundledWithSucc())
-        ++MII;
-      ++MII;
-      return *this;
-    }
-    bundle_iterator operator--(int) {    // postdecrement operators...
-      bundle_iterator tmp = *this;
-      --*this;
-      return tmp;
-    }
-    bundle_iterator operator++(int) {    // postincrement operators...
-      bundle_iterator tmp = *this;
-      ++*this;
-      return tmp;
-    }
-
-    IterTy getInstrIterator() const {
-      return MII;
-    }
-  };
-
   typedef Instructions::iterator                                 instr_iterator;
   typedef Instructions::const_iterator                     const_instr_iterator;
   typedef std::reverse_iterator<instr_iterator>          reverse_instr_iterator;
   typedef
   std::reverse_iterator<const_instr_iterator>      const_reverse_instr_iterator;
 
-  typedef
-  bundle_iterator<MachineInstr,instr_iterator>                         iterator;
-  typedef
-  bundle_iterator<const MachineInstr,const_instr_iterator>       const_iterator;
+  typedef MachineInstrBundleIterator<MachineInstr> iterator;
+  typedef MachineInstrBundleIterator<const MachineInstr> const_iterator;
   typedef std::reverse_iterator<const_iterator>          const_reverse_iterator;
   typedef std::reverse_iterator<iterator>                      reverse_iterator;
 
@@ -257,6 +193,13 @@ public:
   reverse_instr_iterator       instr_rend  ()       { return Insts.rend();   }
   const_reverse_instr_iterator instr_rend  () const { return Insts.rend();   }
 
+  typedef iterator_range<instr_iterator> instr_range;
+  typedef iterator_range<const_instr_iterator> const_instr_range;
+  instr_range instrs() { return instr_range(instr_begin(), instr_end()); }
+  const_instr_range instrs() const {
+    return const_instr_range(instr_begin(), instr_end());
+  }
+
   iterator                begin()       { return instr_begin();  }
   const_iterator          begin() const { return instr_begin();  }
   iterator                end  ()       { return instr_end();    }
@@ -399,10 +342,6 @@ public:
   /// via an exception handler.
   void setIsEHPad(bool V = true) { IsEHPad = V; }
 
-  /// If this block has a successor that is a landing pad, return it. Otherwise
-  /// return NULL.
-  const MachineBasicBlock *getLandingPadSuccessor() const;
-
   bool hasEHPadSuccessor() const;
 
   /// Returns true if this is the entry block of an EH funclet.
@@ -563,7 +502,13 @@ public:
   ///
   /// This function updates LiveVariables, MachineDominatorTree, and
   /// MachineLoopInfo, as applicable.
-  MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P);
+  MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P);
+
+  /// Check if the edge between this block and the given successor \p
+  /// Succ, can be split. If this returns true a subsequent call to
+  /// SplitCriticalEdge is guaranteed to return a valid basic block if
+  /// no changes occured in the meantime.
+  bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const;
 
   void pop_front() { Insts.pop_front(); }
   void pop_back() { Insts.pop_back(); }
@@ -729,9 +674,9 @@ public:
 
   // Debugging methods.
   void dump() const;
-  void print(raw_ostream &OS, SlotIndexes* = nullptr) const;
+  void print(raw_ostream &OS, const SlotIndexes* = nullptr) const;
   void print(raw_ostream &OS, ModuleSlotTracker &MST,
-             SlotIndexes * = nullptr) const;
+             const SlotIndexes* = nullptr) const;
 
   // Printing method used by LoopInfo.
   void printAsOperand(raw_ostream &OS, bool PrintType = true) const;
diff --git a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index feb394e7a69e0..7a236086ed09c 100644
--- a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CODEGEN_MACHINEBLOCKFREQUENCYINFO_H
 #define LLVM_CODEGEN_MACHINEBLOCKFREQUENCYINFO_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/BlockFrequency.h"
 #include <climits>
@@ -50,7 +51,10 @@ public:
   ///
   BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
 
+  Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const;
+
   const MachineFunction *getFunction() const;
+  const MachineBranchProbabilityInfo *getMBPI() const;
   void view() const;
 
   // Print the block frequency Freq to OS using the current functions entry
diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h
index f3891227746fe..11238016d447b 100644
--- a/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -38,7 +38,40 @@ enum class MachineCombinerPattern {
   MULSUBX_OP1,
   MULSUBX_OP2,
   MULADDXI_OP1,
-  MULSUBXI_OP1
+  MULSUBXI_OP1,
+  // Floating Point
+  FMULADDS_OP1,
+  FMULADDS_OP2,
+  FMULSUBS_OP1,
+  FMULSUBS_OP2,
+  FMULADDD_OP1,
+  FMULADDD_OP2,
+  FMULSUBD_OP1,
+  FMULSUBD_OP2,
+  FMLAv1i32_indexed_OP1,
+  FMLAv1i32_indexed_OP2,
+  FMLAv1i64_indexed_OP1,
+  FMLAv1i64_indexed_OP2,
+  FMLAv2f32_OP2,
+  FMLAv2f32_OP1,
+  FMLAv2f64_OP1,
+  FMLAv2f64_OP2,
+  FMLAv2i32_indexed_OP1,
+  FMLAv2i32_indexed_OP2,
+  FMLAv2i64_indexed_OP1,
+  FMLAv2i64_indexed_OP2,
+  FMLAv4f32_OP1,
+  FMLAv4f32_OP2,
+  FMLAv4i32_indexed_OP1,
+  FMLAv4i32_indexed_OP2,
+  FMLSv1i32_indexed_OP2,
+  FMLSv1i64_indexed_OP2,
+  FMLSv2i32_indexed_OP2,
+  FMLSv2i64_indexed_OP2,
+  FMLSv2f32_OP2,
+  FMLSv2f64_OP2,
+  FMLSv4i32_indexed_OP2,
+  FMLSv4f32_OP2
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index a69936f6e267c..ed7cc277e8b6a 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -216,6 +216,8 @@ public:
 
   void releaseMemory() override;
 
+  void verifyAnalysis() const override;
+
   void print(raw_ostream &OS, const Module*) const override;
 
   /// \brief Record that the critical edge (FromBB, ToBB) has been
@@ -239,6 +241,27 @@ public:
            "A basic block inserted via edge splitting cannot appear twice");
     CriticalEdgesToSplit.push_back({FromBB, ToBB, NewBB});
   }
+
+  /// \brief Returns *false* if the other dominator tree matches this dominator
+  /// tree.
+  inline bool compare(const MachineDominatorTree &Other) const {
+    const MachineDomTreeNode *R = getRootNode();
+    const MachineDomTreeNode *OtherR = Other.getRootNode();
+
+    if (!R || !OtherR || R->getBlock() != OtherR->getBlock())
+      return true;
+
+    if (DT->compare(*Other.DT))
+      return true;
+
+    return false;
+  }
+
+  /// \brief Verify the correctness of the domtree by re-computing it.
+  ///
+  /// This should only be used for debugging as it aborts the program if the
+  /// verification fails.
+  void verifyDomTree() const;
 };
 
 //===-------------------------------------
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index e50779aacc23e..59755674c69e5 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -122,21 +122,28 @@ class MachineFrameInfo {
     // arguments have ABI-prescribed offsets).
     bool isAliased;
 
+    /// If true, the object has been zero-extended.
+    bool isZExt;
+
+    /// If true, the object has been zero-extended.
+    bool isSExt;
+
     StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
                 bool isSS, const AllocaInst *Val, bool A)
       : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
         isSpillSlot(isSS), isStatepointSpillSlot(false), Alloca(Val),
-        PreAllocated(false), isAliased(A) {}
+        PreAllocated(false), isAliased(A), isZExt(false), isSExt(false) {}
   };
 
   /// The alignment of the stack.
   unsigned StackAlignment;
 
-  /// Can the stack be realigned.
-  /// Targets that set this to false don't have the ability to overalign
-  /// their stack frame, and thus, overaligned allocas are all treated
-  /// as dynamic allocations and the target must handle them as part
-  /// of DYNAMIC_STACKALLOC lowering.
+  /// Can the stack be realigned. This can be false if the target does not
+  /// support stack realignment, or if the user asks us not to realign the
+  /// stack. In this situation, overaligned allocas are all treated as dynamic
+  /// allocations and the target must handle them as part of DYNAMIC_STACKALLOC
+  /// lowering. All non-alloca stack objects have their alignment clamped to the
+  /// base ABI stack alignment.
   /// FIXME: There is room for improvement in this case, in terms of
   /// grouping overaligned allocas into a "secondary stack frame" and
   /// then only use a single alloca to allocate this frame and only a
@@ -145,39 +152,42 @@ class MachineFrameInfo {
   /// realignment.
   bool StackRealignable;
 
+  /// Whether the function has the \c alignstack attribute.
+  bool ForcedRealign;
+
   /// The list of stack objects allocated.
   std::vector<StackObject> Objects;
 
   /// This contains the number of fixed objects contained on
   /// the stack.  Because fixed objects are stored at a negative index in the
   /// Objects list, this is also the index to the 0th object in the list.
-  unsigned NumFixedObjects;
+  unsigned NumFixedObjects = 0;
 
   /// This boolean keeps track of whether any variable
   /// sized objects have been allocated yet.
-  bool HasVarSizedObjects;
+  bool HasVarSizedObjects = false;
 
   /// This boolean keeps track of whether there is a call
   /// to builtin \@llvm.frameaddress.
-  bool FrameAddressTaken;
+  bool FrameAddressTaken = false;
 
   /// This boolean keeps track of whether there is a call
   /// to builtin \@llvm.returnaddress.
-  bool ReturnAddressTaken;
+  bool ReturnAddressTaken = false;
 
   /// This boolean keeps track of whether there is a call
   /// to builtin \@llvm.experimental.stackmap.
-  bool HasStackMap;
+  bool HasStackMap = false;
 
   /// This boolean keeps track of whether there is a call
   /// to builtin \@llvm.experimental.patchpoint.
-  bool HasPatchPoint;
+  bool HasPatchPoint = false;
 
   /// The prolog/epilog code inserter calculates the final stack
   /// offsets for all of the fixed size objects, updating the Objects list
   /// above.  It then updates StackSize to contain the number of bytes that need
   /// to be allocated on entry to the function.
-  uint64_t StackSize;
+  uint64_t StackSize = 0;
 
   /// The amount that a frame offset needs to be adjusted to
   /// have the actual offset from the stack/frame pointer.  The exact usage of
@@ -188,7 +198,7 @@ class MachineFrameInfo {
   /// targets, this value is only used when generating debug info (via
   /// TargetRegisterInfo::getFrameIndexReference); when generating code, the
   /// corresponding adjustments are performed directly.
-  int OffsetAdjustment;
+  int OffsetAdjustment = 0;
 
   /// The prolog/epilog code inserter may process objects that require greater
   /// alignment than the default alignment the target provides.
@@ -197,27 +207,27 @@ class MachineFrameInfo {
   /// native alignment maintained by the compiler, dynamic alignment code will
   /// be needed.
   ///
-  unsigned MaxAlignment;
+  unsigned MaxAlignment = 0;
 
   /// Set to true if this function adjusts the stack -- e.g.,
   /// when calling another function. This is only valid during and after
   /// prolog/epilog code insertion.
-  bool AdjustsStack;
+  bool AdjustsStack = false;
 
   /// Set to true if this function has any function calls.
-  bool HasCalls;
+  bool HasCalls = false;
 
   /// The frame index for the stack protector.
-  int StackProtectorIdx;
+  int StackProtectorIdx = -1;
 
   /// The frame index for the function context. Used for SjLj exceptions.
-  int FunctionContextIdx;
+  int FunctionContextIdx = -1;
 
   /// This contains the size of the largest call frame if the target uses frame
   /// setup/destroy pseudo instructions (as defined in the TargetFrameInfo
   /// class).  This information is important for frame pointer elimination.
   /// It is only valid during and after prolog/epilog code insertion.
-  unsigned MaxCallFrameSize;
+  unsigned MaxCallFrameSize = 0;
 
   /// The prolog/epilog code inserter fills in this vector with each
   /// callee saved register saved in the frame.  Beyond its use by the prolog/
@@ -226,79 +236,53 @@ class MachineFrameInfo {
   std::vector<CalleeSavedInfo> CSInfo;
 
   /// Has CSInfo been set yet?
-  bool CSIValid;
+  bool CSIValid = false;
 
   /// References to frame indices which are mapped
   /// into the local frame allocation block. <FrameIdx, LocalOffset>
   SmallVector<std::pair<int, int64_t>, 32> LocalFrameObjects;
 
   /// Size of the pre-allocated local frame block.
-  int64_t LocalFrameSize;
+  int64_t LocalFrameSize = 0;
 
   /// Required alignment of the local object blob, which is the strictest
   /// alignment of any object in it.
-  unsigned LocalFrameMaxAlign;
+  unsigned LocalFrameMaxAlign = 0;
 
   /// Whether the local object blob needs to be allocated together. If not,
   /// PEI should ignore the isPreAllocated flags on the stack objects and
   /// just allocate them normally.
-  bool UseLocalStackAllocationBlock;
-
-  /// Whether the "realign-stack" option is on.
-  bool RealignOption;
+  bool UseLocalStackAllocationBlock = false;
 
   /// True if the function dynamically adjusts the stack pointer through some
   /// opaque mechanism like inline assembly or Win32 EH.
-  bool HasOpaqueSPAdjustment;
+  bool HasOpaqueSPAdjustment = false;
 
   /// True if the function contains operations which will lower down to
   /// instructions which manipulate the stack pointer.
-  bool HasCopyImplyingStackAdjustment;
+  bool HasCopyImplyingStackAdjustment = false;
 
   /// True if the function contains a call to the llvm.vastart intrinsic.
-  bool HasVAStart;
+  bool HasVAStart = false;
 
   /// True if this is a varargs function that contains a musttail call.
-  bool HasMustTailInVarArgFunc;
+  bool HasMustTailInVarArgFunc = false;
 
   /// True if this function contains a tail call. If so immutable objects like
   /// function arguments are no longer so. A tail call *can* override fixed
   /// stack objects like arguments so we can't treat them as immutable.
-  bool HasTailCall;
+  bool HasTailCall = false;
 
   /// Not null, if shrink-wrapping found a better place for the prologue.
-  MachineBasicBlock *Save;
+  MachineBasicBlock *Save = nullptr;
   /// Not null, if shrink-wrapping found a better place for the epilogue.
-  MachineBasicBlock *Restore;
+  MachineBasicBlock *Restore = nullptr;
 
 public:
-  explicit MachineFrameInfo(unsigned StackAlign, bool isStackRealign,
-                            bool RealignOpt)
-      : StackAlignment(StackAlign), StackRealignable(isStackRealign),
-        RealignOption(RealignOpt) {
-    StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0;
-    HasVarSizedObjects = false;
-    FrameAddressTaken = false;
-    ReturnAddressTaken = false;
-    HasStackMap = false;
-    HasPatchPoint = false;
-    AdjustsStack = false;
-    HasCalls = false;
-    StackProtectorIdx = -1;
-    FunctionContextIdx = -1;
-    MaxCallFrameSize = 0;
-    CSIValid = false;
-    LocalFrameSize = 0;
-    LocalFrameMaxAlign = 0;
-    UseLocalStackAllocationBlock = false;
-    HasOpaqueSPAdjustment = false;
-    HasCopyImplyingStackAdjustment = false;
-    HasVAStart = false;
-    HasMustTailInVarArgFunc = false;
-    Save = nullptr;
-    Restore = nullptr;
-    HasTailCall = false;
-  }
+  explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable,
+                            bool ForcedRealign)
+      : StackAlignment(StackAlignment), StackRealignable(StackRealignable),
+        ForcedRealign(ForcedRealign) {}
 
   /// Return true if there are any stack objects in this function.
   bool hasStackObjects() const { return !Objects.empty(); }
@@ -450,6 +434,30 @@ public:
     return Objects[ObjectIdx+NumFixedObjects].SPOffset;
   }
 
+  bool isObjectZExt(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].isZExt;
+  }
+
+  void setObjectZExt(int ObjectIdx, bool IsZExt) {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    Objects[ObjectIdx+NumFixedObjects].isZExt = IsZExt;
+  }
+
+  bool isObjectSExt(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].isSExt;
+  }
+
+  void setObjectSExt(int ObjectIdx, bool IsSExt) {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    Objects[ObjectIdx+NumFixedObjects].isSExt = IsSExt;
+  }
+
   /// Set the stack frame offset of the specified object. The
   /// offset is relative to the stack pointer on entry to the function.
   void setObjectOffset(int ObjectIdx, int64_t SPOffset) {
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index df7c951743c97..4aa9a92e6bee4 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -18,12 +18,15 @@
 #ifndef LLVM_CODEGEN_MACHINEFUNCTION_H
 #define LLVM_CODEGEN_MACHINEFUNCTION_H
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ArrayRecycler.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Recycler.h"
 
 namespace llvm {
@@ -50,6 +53,8 @@ struct ilist_traits<MachineBasicBlock>
     : public ilist_default_traits<MachineBasicBlock> {
   mutable ilist_half_node<MachineBasicBlock> Sentinel;
 public:
+  // FIXME: This downcast is UB. See llvm.org/PR26753.
+  LLVM_NO_SANITIZE("object-size")
   MachineBasicBlock *createSentinel() const {
     return static_cast<MachineBasicBlock*>(&Sentinel);
   }
@@ -85,6 +90,74 @@ struct MachineFunctionInfo {
   }
 };
 
+/// Properties which a MachineFunction may have at a given point in time.
+/// Each of these has checking code in the MachineVerifier, and passes can
+/// require that a property be set.
+class MachineFunctionProperties {
+  // TODO: Add MachineVerifier checks for AllVRegsAllocated
+  // TODO: Add a way to print the properties and make more useful error messages
+  // Possible TODO: Allow targets to extend this (perhaps by allowing the
+  // constructor to specify the size of the bit vector)
+  // Possible TODO: Allow requiring the negative (e.g. VRegsAllocated could be
+  // stated as the negative of "has vregs"
+
+public:
+  // The properties are stated in "positive" form; i.e. a pass could require
+  // that the property hold, but not that it does not hold.
+
+  // Property descriptions:
+  // IsSSA: True when the machine function is in SSA form and virtual registers
+  //  have a single def.
+  // TracksLiveness: True when tracking register liveness accurately.
+  //  While this property is set, register liveness information in basic block
+  //  live-in lists and machine instruction operands (e.g. kill flags, implicit
+  //  defs) is accurate. This means it can be used to change the code in ways
+  //  that affect the values in registers, for example by the register
+  //  scavenger.
+  //  When this property is clear, liveness is no longer reliable.
+  // AllVRegsAllocated: All virtual registers have been allocated; i.e. all
+  //  register operands are physical registers.
+  enum class Property : unsigned {
+    IsSSA,
+    TracksLiveness,
+    AllVRegsAllocated,
+    LastProperty,
+  };
+
+  bool hasProperty(Property P) const {
+    return Properties[static_cast<unsigned>(P)];
+  }
+  MachineFunctionProperties &set(Property P) {
+    Properties.set(static_cast<unsigned>(P));
+    return *this;
+  }
+  MachineFunctionProperties &clear(Property P) {
+    Properties.reset(static_cast<unsigned>(P));
+    return *this;
+  }
+  MachineFunctionProperties &set(const MachineFunctionProperties &MFP) {
+    Properties |= MFP.Properties;
+    return *this;
+  }
+  MachineFunctionProperties &clear(const MachineFunctionProperties &MFP) {
+    Properties.reset(MFP.Properties);
+    return *this;
+  }
+  // Returns true if all properties set in V (i.e. required by a pass) are set
+  // in this.
+  bool verifyRequiredProperties(const MachineFunctionProperties &V) const {
+    return !V.Properties.test(Properties);
+  }
+
+  // Print the MachineFunctionProperties in human-readable form. If OnlySet is
+  // true, only print the properties that are set.
+  void print(raw_ostream &ROS, bool OnlySet=false) const;
+
+private:
+  BitVector Properties =
+      BitVector(static_cast<unsigned>(Property::LastProperty));
+};
+
 class MachineFunction {
   const Function *Fn;
   const TargetMachine &Target;
@@ -146,10 +219,14 @@ class MachineFunction {
   /// the attribute itself.
   /// This is used to limit optimizations which cannot reason
   /// about the control flow of such functions.
-  bool ExposesReturnsTwice;
+  bool ExposesReturnsTwice = false;
 
   /// True if the function includes any inline assembly.
-  bool HasInlineAsm;
+  bool HasInlineAsm = false;
+
+  /// Current high-level properties of the IR of the function (e.g. is in SSA
+  /// form or whether registers have been allocated)
+  MachineFunctionProperties Properties;
 
   // Allocation management for pseudo source values.
   std::unique_ptr<PseudoSourceValueManager> PSVManager;
@@ -268,6 +345,10 @@ public:
     HasInlineAsm = B;
   }
 
+  /// Get the function properties
+  const MachineFunctionProperties &getProperties() const { return Properties; }
+  MachineFunctionProperties &getProperties() { return Properties; }
+
   /// getInfo - Keep track of various per-function pieces of information for
   /// backends that would like to do so.
   ///
@@ -311,7 +392,7 @@ public:
   /// print - Print out the MachineFunction in a format suitable for debugging
   /// to the specified stream.
   ///
-  void print(raw_ostream &OS, SlotIndexes* = nullptr) const;
+  void print(raw_ostream &OS, const SlotIndexes* = nullptr) const;
 
   /// viewCFG - This function is meant for use from the debugger.  You can just
   /// say 'call F->viewCFG()' and a ghostview window should pop up from the
@@ -332,9 +413,11 @@ public:
   ///
   void dump() const;
 
-  /// verify - Run the current MachineFunction through the machine code
-  /// verifier, useful for debugger use.
-  void verify(Pass *p = nullptr, const char *Banner = nullptr) const;
+  /// Run the current MachineFunction through the machine code verifier, useful
+  /// for debugger use.
+  /// \returns true if no problems were found.
+  bool verify(Pass *p = nullptr, const char *Banner = nullptr,
+              bool AbortOnError = true) const;
 
   // Provide accessors for the MachineBasicBlock list...
   typedef BasicBlockListType::iterator iterator;
@@ -420,8 +503,7 @@ public:
   /// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
   /// of `new MachineInstr'.
   ///
-  MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID,
-                                   DebugLoc DL,
+  MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL,
                                    bool NoImp = false);
 
   /// CloneMachineInstr - Create a new MachineInstr which is a copy of the
@@ -449,8 +531,8 @@ public:
   /// MachineMemOperands are owned by the MachineFunction and need not be
   /// explicitly deallocated.
   MachineMemOperand *getMachineMemOperand(MachinePointerInfo PtrInfo,
-                                          unsigned f, uint64_t s,
-                                          unsigned base_alignment,
+                                          MachineMemOperand::Flags f,
+                                          uint64_t s, unsigned base_alignment,
                                           const AAMDNodes &AAInfo = AAMDNodes(),
                                           const MDNode *Ranges = nullptr);
 
diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h
index 50a1f6e96217d..653d1175d04b4 100644
--- a/include/llvm/CodeGen/MachineFunctionPass.h
+++ b/include/llvm/CodeGen/MachineFunctionPass.h
@@ -20,16 +20,24 @@
 #define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H
 
 #include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
 
-class MachineFunction;
-
 /// MachineFunctionPass - This class adapts the FunctionPass interface to
 /// allow convenient creation of passes that operate on the MachineFunction
 /// representation. Instead of overriding runOnFunction, subclasses
 /// override runOnMachineFunction.
 class MachineFunctionPass : public FunctionPass {
+public:
+  bool doInitialization(Module&) override {
+    // Cache the properties info at module-init time so we don't have to
+    // construct them for every function.
+    RequiredProperties = getRequiredProperties();
+    SetProperties = getSetProperties();
+    ClearedProperties = getClearedProperties();
+    return false;
+  }
 protected:
   explicit MachineFunctionPass(char &ID) : FunctionPass(ID) {}
 
@@ -46,7 +54,21 @@ protected:
   ///
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
+  virtual MachineFunctionProperties getRequiredProperties() const {
+    return MachineFunctionProperties();
+  }
+  virtual MachineFunctionProperties getSetProperties() const {
+    return MachineFunctionProperties();
+  }
+  virtual MachineFunctionProperties getClearedProperties() const {
+    return MachineFunctionProperties();
+  }
+
 private:
+  MachineFunctionProperties RequiredProperties;
+  MachineFunctionProperties SetProperties;
+  MachineFunctionProperties ClearedProperties;
+
   /// createPrinterPass - Get a machine function printer pass.
   Pass *createPrinterPass(raw_ostream &O,
                           const std::string &Banner) const override;
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 05c9a9e0b0796..8f1cb9b6f6590 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -16,16 +16,13 @@
 #ifndef LLVM_CODEGEN_MACHINEINSTR_H
 #define LLVM_CODEGEN_MACHINEINSTR_H
 
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/MC/MCInstrDesc.h"
@@ -34,10 +31,17 @@
 
 namespace llvm {
 
+class StringRef;
+template <typename T> class ArrayRef;
 template <typename T> class SmallVectorImpl;
+class DILocalVariable;
+class DIExpression;
 class TargetInstrInfo;
 class TargetRegisterClass;
 class TargetRegisterInfo;
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+class Type;
+#endif
 class MachineFunction;
 class MachineMemOperand;
 
@@ -102,6 +106,13 @@ private:
 
   DebugLoc debugLoc;                    // Source line information.
 
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+  /// Type of the instruction in case of a generic opcode.
+  /// \invariant This must be nullptr is getOpcode() is not
+  /// in the range of generic opcodes.
+  Type *Ty;
+#endif
+
   MachineInstr(const MachineInstr&) = delete;
   void operator=(const MachineInstr&) = delete;
   // Use MachineFunction::DeleteMachineInstr() instead.
@@ -176,6 +187,11 @@ public:
     Flags &= ~((uint8_t)Flag);
   }
 
+  /// Set the type of the instruction.
+  /// \pre getOpcode() is in the range of the generic opcodes.
+  void setType(Type *Ty);
+  Type *getType() const;
+
   /// Return true if MI is in a bundle (but not the first MI in a bundle).
   ///
   /// A bundle looks like this before it's finalized:
@@ -248,17 +264,11 @@ public:
 
   /// Return the debug variable referenced by
   /// this DBG_VALUE instruction.
-  const DILocalVariable *getDebugVariable() const {
-    assert(isDebugValue() && "not a DBG_VALUE");
-    return cast<DILocalVariable>(getOperand(2).getMetadata());
-  }
+  const DILocalVariable *getDebugVariable() const;
 
   /// Return the complex address expression referenced by
   /// this DBG_VALUE instruction.
-  const DIExpression *getDebugExpression() const {
-    assert(isDebugValue() && "not a DBG_VALUE");
-    return cast<DIExpression>(getOperand(3).getMetadata());
-  }
+  const DIExpression *getDebugExpression() const;
 
   /// Emit an error referring to the source location of this instruction.
   /// This should only be used for inline assembly that is somehow
@@ -343,6 +353,14 @@ public:
     return make_range(operands_begin() + getDesc().getNumDefs(),
                       operands_end());
   }
+  iterator_range<mop_iterator> explicit_uses() {
+    return make_range(operands_begin() + getDesc().getNumDefs(),
+                      operands_begin() + getNumExplicitOperands() );
+  }
+  iterator_range<const_mop_iterator> explicit_uses() const {
+    return make_range(operands_begin() + getDesc().getNumDefs(),
+                      operands_begin() + getNumExplicitOperands() );
+  }
 
   /// Returns the number of the operand iterator \p I points to.
   unsigned getOperandNo(const_mop_iterator I) const {
@@ -508,6 +526,11 @@ public:
   /// Convergent instructions can not be made control-dependent on any
   /// additional values.
   bool isConvergent(QueryType Type = AnyInBundle) const {
+    if (isInlineAsm()) {
+      unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+      if (ExtraInfo & InlineAsm::Extra_IsConvergent)
+        return true;
+    }
     return hasProperty(MCID::Convergent, Type);
   }
 
@@ -713,7 +736,7 @@ public:
 
   /// Return true if this instruction is identical to (same
   /// opcode and same operands as) the specified instruction.
-  bool isIdenticalTo(const MachineInstr *Other,
+  bool isIdenticalTo(const MachineInstr &Other,
                      MICheckType Check = CheckDefs) const;
 
   /// Unlink 'this' from the containing basic block, and return it without
@@ -899,6 +922,10 @@ public:
     return findRegisterDefOperandIdx(Reg, true, false, TRI) != -1;
   }
 
+  /// Returns true if the MachineInstr has an implicit-use operand of exactly
+  /// the given register (not considering sub/super-registers).
+  bool hasRegisterImplicitUseOperand(unsigned Reg) const;
+
   /// Returns the operand index that is a use of the specific register or -1
   /// if it is not found. It further tightens the search criteria to a use
   /// that kills the register if isKill is true.
@@ -1054,8 +1081,8 @@ public:
                          const TargetRegisterInfo *RegInfo,
                          bool AddIfNotFound = false);
 
-  /// Clear all kill flags affecting Reg.  If RegInfo is
-  /// provided, this includes super-register kills.
+  /// Clear all kill flags affecting Reg.  If RegInfo is provided, this includes
+  /// all aliasing registers.
   void clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo);
 
   /// We have determined MI defined a register without a use.
@@ -1125,7 +1152,7 @@ public:
 
   /// Copy implicit register operands from specified
   /// instruction to this instruction.
-  void copyImplicitOps(MachineFunction &MF, const MachineInstr *MI);
+  void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI);
 
   //
   // Debugging support
@@ -1168,7 +1195,7 @@ public:
     assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
   }
 
-  /// Erase an operand  from an instruction, leaving it with one
+  /// Erase an operand from an instruction, leaving it with one
   /// fewer operand than it started with.
   void RemoveOperand(unsigned i);
 
@@ -1268,7 +1295,7 @@ struct MachineInstrExpressionTrait : DenseMapInfo<MachineInstr*> {
     if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
         LHS == getEmptyKey() || LHS == getTombstoneKey())
       return LHS == RHS;
-    return LHS->isIdenticalTo(RHS, MachineInstr::IgnoreVRegDefs);
+    return LHS->isIdenticalTo(*RHS, MachineInstr::IgnoreVRegDefs);
   }
 };
 
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 8fe9b280d5d2d..37b67aa0cf5c0 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -10,7 +10,9 @@
 // This file exposes a function named BuildMI, which is useful for dramatically
 // simplifying how MachineInstr's are created.  It allows use of code like this:
 //
-//   M = BuildMI(X86::ADDrr8, 2).addReg(argVal1).addReg(argVal2);
+//   M = BuildMI(MBB, MI, DL, TII.get(X86::ADD8rr), Dst)
+//           .addReg(argVal1)
+//           .addReg(argVal2);
 //
 //===----------------------------------------------------------------------===//
 
@@ -51,6 +53,8 @@ public:
   /// Create a MachineInstrBuilder for manipulating an existing instruction.
   /// F must be the machine function that was used to allocate I.
   MachineInstrBuilder(MachineFunction &F, MachineInstr *I) : MF(&F), MI(I) {}
+  MachineInstrBuilder(MachineFunction &F, MachineBasicBlock::iterator I)
+      : MF(&F), MI(&*I) {}
 
   /// Allow automatic conversion to the machine instruction we are working on.
   operator MachineInstr*() const { return MI; }
@@ -228,25 +232,22 @@ public:
 
   /// Copy all the implicit operands from OtherMI onto this one.
   const MachineInstrBuilder &
-  copyImplicitOps(const MachineInstr *OtherMI) const {
+  copyImplicitOps(const MachineInstr &OtherMI) const {
     MI->copyImplicitOps(*MF, OtherMI);
     return *this;
   }
 };
 
 /// Builder interface. Specify how to create the initial instruction itself.
-inline MachineInstrBuilder BuildMI(MachineFunction &MF,
-                                   DebugLoc DL,
+inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
                                    const MCInstrDesc &MCID) {
   return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL));
 }
 
 /// This version of the builder sets up the first operand as a
 /// destination virtual register.
-inline MachineInstrBuilder BuildMI(MachineFunction &MF,
-                                   DebugLoc DL,
-                                   const MCInstrDesc &MCID,
-                                   unsigned DestReg) {
+inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
+                                   const MCInstrDesc &MCID, unsigned DestReg) {
   return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL))
            .addReg(DestReg, RegState::Define);
 }
@@ -256,8 +257,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF,
 /// operand as a destination virtual register.
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::iterator I,
-                                   DebugLoc DL,
-                                   const MCInstrDesc &MCID,
+                                   const DebugLoc &DL, const MCInstrDesc &MCID,
                                    unsigned DestReg) {
   MachineFunction &MF = *BB.getParent();
   MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
@@ -265,10 +265,15 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return MachineInstrBuilder(MF, MI).addReg(DestReg, RegState::Define);
 }
 
+/// This version of the builder inserts the newly-built instruction before
+/// the given position in the given MachineBasicBlock, and sets up the first
+/// operand as a destination virtual register.
+///
+/// If \c I is inside a bundle, then the newly inserted \a MachineInstr is
+/// added to the same bundle.
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::instr_iterator I,
-                                   DebugLoc DL,
-                                   const MCInstrDesc &MCID,
+                                   const DebugLoc &DL, const MCInstrDesc &MCID,
                                    unsigned DestReg) {
   MachineFunction &MF = *BB.getParent();
   MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
@@ -276,18 +281,20 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return MachineInstrBuilder(MF, MI).addReg(DestReg, RegState::Define);
 }
 
-inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
-                                   MachineInstr *I,
-                                   DebugLoc DL,
-                                   const MCInstrDesc &MCID,
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I,
+                                   const DebugLoc &DL, const MCInstrDesc &MCID,
                                    unsigned DestReg) {
-  if (I->isInsideBundle()) {
-    MachineBasicBlock::instr_iterator MII(I);
-    return BuildMI(BB, MII, DL, MCID, DestReg);
-  }
+  // Calling the overload for instr_iterator is always correct.  However, the
+  // definition is not available in headers, so inline the check.
+  if (I.isInsideBundle())
+    return BuildMI(BB, MachineBasicBlock::instr_iterator(I), DL, MCID, DestReg);
+  return BuildMI(BB, MachineBasicBlock::iterator(I), DL, MCID, DestReg);
+}
 
-  MachineBasicBlock::iterator MII = I;
-  return BuildMI(BB, MII, DL, MCID, DestReg);
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr *I,
+                                   const DebugLoc &DL, const MCInstrDesc &MCID,
+                                   unsigned DestReg) {
+  return BuildMI(BB, *I, DL, MCID, DestReg);
 }
 
 /// This version of the builder inserts the newly-built instruction before the
@@ -295,7 +302,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
 /// destination register.
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::iterator I,
-                                   DebugLoc DL,
+                                   const DebugLoc &DL,
                                    const MCInstrDesc &MCID) {
   MachineFunction &MF = *BB.getParent();
   MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
@@ -305,7 +312,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
 
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::instr_iterator I,
-                                   DebugLoc DL,
+                                   const DebugLoc &DL,
                                    const MCInstrDesc &MCID) {
   MachineFunction &MF = *BB.getParent();
   MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
@@ -313,23 +320,25 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return MachineInstrBuilder(MF, MI);
 }
 
-inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
-                                   MachineInstr *I,
-                                   DebugLoc DL,
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I,
+                                   const DebugLoc &DL,
                                    const MCInstrDesc &MCID) {
-  if (I->isInsideBundle()) {
-    MachineBasicBlock::instr_iterator MII(I);
-    return BuildMI(BB, MII, DL, MCID);
-  }
+  // Calling the overload for instr_iterator is always correct.  However, the
+  // definition is not available in headers, so inline the check.
+  if (I.isInsideBundle())
+    return BuildMI(BB, MachineBasicBlock::instr_iterator(I), DL, MCID);
+  return BuildMI(BB, MachineBasicBlock::iterator(I), DL, MCID);
+}
 
-  MachineBasicBlock::iterator MII = I;
-  return BuildMI(BB, MII, DL, MCID);
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr *I,
+                                   const DebugLoc &DL,
+                                   const MCInstrDesc &MCID) {
+  return BuildMI(BB, *I, DL, MCID);
 }
 
 /// This version of the builder inserts the newly-built instruction at the end
 /// of the given MachineBasicBlock, and does NOT take a destination register.
-inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
-                                   DebugLoc DL,
+inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL,
                                    const MCInstrDesc &MCID) {
   return BuildMI(*BB, BB->end(), DL, MCID);
 }
@@ -337,10 +346,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
 /// This version of the builder inserts the newly-built instruction at the
 /// end of the given MachineBasicBlock, and sets up the first operand as a
 /// destination virtual register.
-inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
-                                   DebugLoc DL,
-                                   const MCInstrDesc &MCID,
-                                   unsigned DestReg) {
+inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL,
+                                   const MCInstrDesc &MCID, unsigned DestReg) {
   return BuildMI(*BB, BB->end(), DL, MCID, DestReg);
 }
 
@@ -348,47 +355,19 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
 /// for either a value in a register or a register-indirect+offset
 /// address.  The convention is that a DBG_VALUE is indirect iff the
 /// second operand is an immediate.
-inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL,
-                                   const MCInstrDesc &MCID, bool IsIndirect,
-                                   unsigned Reg, unsigned Offset,
-                                   const MDNode *Variable, const MDNode *Expr) {
-  assert(isa<DILocalVariable>(Variable) && "not a variable");
-  assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
-  assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
-         "Expected inlined-at fields to agree");
-  if (IsIndirect)
-    return BuildMI(MF, DL, MCID)
-        .addReg(Reg, RegState::Debug)
-        .addImm(Offset)
-        .addMetadata(Variable)
-        .addMetadata(Expr);
-  else {
-    assert(Offset == 0 && "A direct address cannot have an offset.");
-    return BuildMI(MF, DL, MCID)
-        .addReg(Reg, RegState::Debug)
-        .addReg(0U, RegState::Debug)
-        .addMetadata(Variable)
-        .addMetadata(Expr);
-  }
-}
+MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
+                            const MCInstrDesc &MCID, bool IsIndirect,
+                            unsigned Reg, unsigned Offset,
+                            const MDNode *Variable, const MDNode *Expr);
 
 /// This version of the builder builds a DBG_VALUE intrinsic
 /// for either a value in a register or a register-indirect+offset
 /// address and inserts it at position I.
-inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
-                                   MachineBasicBlock::iterator I, DebugLoc DL,
-                                   const MCInstrDesc &MCID, bool IsIndirect,
-                                   unsigned Reg, unsigned Offset,
-                                   const MDNode *Variable, const MDNode *Expr) {
-  assert(isa<DILocalVariable>(Variable) && "not a variable");
-  assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
-  MachineFunction &MF = *BB.getParent();
-  MachineInstr *MI =
-      BuildMI(MF, DL, MCID, IsIndirect, Reg, Offset, Variable, Expr);
-  BB.insert(I, MI);
-  return MachineInstrBuilder(MF, MI);
-}
-
+MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                            MachineBasicBlock::iterator I, const DebugLoc &DL,
+                            const MCInstrDesc &MCID, bool IsIndirect,
+                            unsigned Reg, unsigned Offset,
+                            const MDNode *Variable, const MDNode *Expr);
 
 inline unsigned getDefRegState(bool B) {
   return B ? RegState::Define : 0;
@@ -412,6 +391,17 @@ inline unsigned getDebugRegState(bool B) {
   return B ? RegState::Debug : 0;
 }
 
+/// Get all register state flags from machine operand \p RegOp.
+inline unsigned getRegState(const MachineOperand &RegOp) {
+  assert(RegOp.isReg() && "Not a register operand");
+  return getDefRegState(RegOp.isDef())                    |
+         getImplRegState(RegOp.isImplicit())              |
+         getKillRegState(RegOp.isKill())                  |
+         getDeadRegState(RegOp.isDead())                  |
+         getUndefRegState(RegOp.isUndef())                |
+         getInternalReadRegState(RegOp.isInternalRead())  |
+         getDebugRegState(RegOp.isDebug());
+}
 
 /// Helper class for constructing bundles of MachineInstrs.
 ///
@@ -426,28 +416,26 @@ class MIBundleBuilder {
 public:
   /// Create an MIBundleBuilder that inserts instructions into a new bundle in
   /// BB above the bundle or instruction at Pos.
-  MIBundleBuilder(MachineBasicBlock &BB,
-                  MachineBasicBlock::iterator Pos)
-    : MBB(BB), Begin(Pos.getInstrIterator()), End(Begin) {}
+  MIBundleBuilder(MachineBasicBlock &BB, MachineBasicBlock::iterator Pos)
+      : MBB(BB), Begin(Pos.getInstrIterator()), End(Begin) {}
 
   /// Create a bundle from the sequence of instructions between B and E.
-  MIBundleBuilder(MachineBasicBlock &BB,
-                  MachineBasicBlock::iterator B,
+  MIBundleBuilder(MachineBasicBlock &BB, MachineBasicBlock::iterator B,
                   MachineBasicBlock::iterator E)
-    : MBB(BB), Begin(B.getInstrIterator()), End(E.getInstrIterator()) {
+      : MBB(BB), Begin(B.getInstrIterator()), End(E.getInstrIterator()) {
     assert(B != E && "No instructions to bundle");
     ++B;
     while (B != E) {
-      MachineInstr *MI = B;
+      MachineInstr &MI = *B;
       ++B;
-      MI->bundleWithPred();
+      MI.bundleWithPred();
     }
   }
 
   /// Create an MIBundleBuilder representing an existing instruction or bundle
   /// that has MI as its head.
   explicit MIBundleBuilder(MachineInstr *MI)
-    : MBB(*MI->getParent()), Begin(MI), End(getBundleEnd(MI)) {}
+      : MBB(*MI->getParent()), Begin(MI), End(getBundleEnd(*MI)) {}
 
   /// Return a reference to the basic block containing this bundle.
   MachineBasicBlock &getMBB() const { return MBB; }
diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h
index 4e88606c05a7e..c0033a5148cf5 100644
--- a/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/include/llvm/CodeGen/MachineInstrBundle.h
@@ -43,23 +43,22 @@ bool finalizeBundles(MachineFunction &MF);
 
 /// getBundleStart - Returns the first instruction in the bundle containing MI.
 ///
-inline MachineInstr *getBundleStart(MachineInstr *MI) {
+inline MachineInstr &getBundleStart(MachineInstr &MI) {
   MachineBasicBlock::instr_iterator I(MI);
   while (I->isBundledWithPred())
     --I;
-  return &*I;
+  return *I;
 }
 
-inline const MachineInstr *getBundleStart(const MachineInstr *MI) {
+inline const MachineInstr &getBundleStart(const MachineInstr &MI) {
   MachineBasicBlock::const_instr_iterator I(MI);
   while (I->isBundledWithPred())
     --I;
-  return &*I;
+  return *I;
 }
 
 /// Return an iterator pointing beyond the bundle containing MI.
-inline MachineBasicBlock::instr_iterator
-getBundleEnd(MachineInstr *MI) {
+inline MachineBasicBlock::instr_iterator getBundleEnd(MachineInstr &MI) {
   MachineBasicBlock::instr_iterator I(MI);
   while (I->isBundledWithSucc())
     ++I;
@@ -68,7 +67,7 @@ getBundleEnd(MachineInstr *MI) {
 
 /// Return an iterator pointing beyond the bundle containing MI.
 inline MachineBasicBlock::const_instr_iterator
-getBundleEnd(const MachineInstr *MI) {
+getBundleEnd(const MachineInstr &MI) {
   MachineBasicBlock::const_instr_iterator I(MI);
   while (I->isBundledWithSucc())
     ++I;
@@ -114,12 +113,12 @@ protected:
   /// @param MI The instruction to examine.
   /// @param WholeBundle When true, visit all operands on the entire bundle.
   ///
-  explicit MachineOperandIteratorBase(MachineInstr *MI, bool WholeBundle) {
+  explicit MachineOperandIteratorBase(MachineInstr &MI, bool WholeBundle) {
     if (WholeBundle) {
-      InstrI = getBundleStart(MI)->getIterator();
-      InstrE = MI->getParent()->instr_end();
+      InstrI = getBundleStart(MI).getIterator();
+      InstrE = MI.getParent()->instr_end();
     } else {
-      InstrI = InstrE = MI->getIterator();
+      InstrI = InstrE = MI.getIterator();
       ++InstrE;
     }
     OpI = InstrI->operands_begin();
@@ -184,10 +183,16 @@ public:
     /// Reg or a super-register is read. The full register is read.
     bool FullyRead;
 
-    /// Reg is FullyDefined and all defs of reg or an overlapping register are
-    /// dead.
+    /// Either:
+    /// - Reg is FullyDefined and all defs of reg or an overlapping
+    ///   register are dead, or
+    /// - Reg is completely dead because "defined" by a clobber.
     bool DeadDef;
 
+    /// Reg is Defined and all defs of reg or an overlapping register are
+    /// dead.
+    bool PartialDeadDef;
+
     /// There is a use operand of reg or a super-register with kill flag set.
     bool Killed;
   };
@@ -216,7 +221,7 @@ public:
 ///
 class MIOperands : public MachineOperandIteratorBase {
 public:
-  MIOperands(MachineInstr *MI) : MachineOperandIteratorBase(MI, false) {}
+  MIOperands(MachineInstr &MI) : MachineOperandIteratorBase(MI, false) {}
   MachineOperand &operator* () const { return deref(); }
   MachineOperand *operator->() const { return &deref(); }
 };
@@ -225,8 +230,8 @@ public:
 ///
 class ConstMIOperands : public MachineOperandIteratorBase {
 public:
-  ConstMIOperands(const MachineInstr *MI)
-    : MachineOperandIteratorBase(const_cast<MachineInstr*>(MI), false) {}
+  ConstMIOperands(const MachineInstr &MI)
+      : MachineOperandIteratorBase(const_cast<MachineInstr &>(MI), false) {}
   const MachineOperand &operator* () const { return deref(); }
   const MachineOperand *operator->() const { return &deref(); }
 };
@@ -236,7 +241,7 @@ public:
 ///
 class MIBundleOperands : public MachineOperandIteratorBase {
 public:
-  MIBundleOperands(MachineInstr *MI) : MachineOperandIteratorBase(MI, true) {}
+  MIBundleOperands(MachineInstr &MI) : MachineOperandIteratorBase(MI, true) {}
   MachineOperand &operator* () const { return deref(); }
   MachineOperand *operator->() const { return &deref(); }
 };
@@ -246,8 +251,8 @@ public:
 ///
 class ConstMIBundleOperands : public MachineOperandIteratorBase {
 public:
-  ConstMIBundleOperands(const MachineInstr *MI)
-    : MachineOperandIteratorBase(const_cast<MachineInstr*>(MI), true) {}
+  ConstMIBundleOperands(const MachineInstr &MI)
+      : MachineOperandIteratorBase(const_cast<MachineInstr &>(MI), true) {}
   const MachineOperand &operator* () const { return deref(); }
   const MachineOperand *operator->() const { return &deref(); }
 };
diff --git a/include/llvm/CodeGen/MachineInstrBundleIterator.h b/include/llvm/CodeGen/MachineInstrBundleIterator.h
new file mode 100644
index 0000000000000..45a9a188f90ed
--- /dev/null
+++ b/include/llvm/CodeGen/MachineInstrBundleIterator.h
@@ -0,0 +1,92 @@
+//===- llvm/CodeGen/MachineInstrBundleIterator.h ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines an iterator class that bundles MachineInstr.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEINSTRBUNDLEITERATOR_H
+#define LLVM_CODEGEN_MACHINEINSTRBUNDLEITERATOR_H
+
+#include "llvm/ADT/ilist.h"
+#include <iterator>
+
+namespace llvm {
+
+/// MachineBasicBlock iterator that automatically skips over MIs that are
+/// inside bundles (i.e. walk top level MIs only).
+template <typename Ty>
+class MachineInstrBundleIterator
+    : public std::iterator<std::bidirectional_iterator_tag, Ty, ptrdiff_t> {
+  typedef ilist_iterator<Ty> instr_iterator;
+  instr_iterator MII;
+
+public:
+  MachineInstrBundleIterator(instr_iterator MI) : MII(MI) {}
+
+  MachineInstrBundleIterator(Ty &MI) : MII(MI) {
+    assert(!MI.isBundledWithPred() && "It's not legal to initialize "
+                                      "MachineInstrBundleIterator with a "
+                                      "bundled MI");
+  }
+  MachineInstrBundleIterator(Ty *MI) : MII(MI) {
+    // FIXME: This conversion should be explicit.
+    assert((!MI || !MI->isBundledWithPred()) && "It's not legal to initialize "
+                                                "MachineInstrBundleIterator "
+                                                "with a bundled MI");
+  }
+  // Template allows conversion from const to nonconst.
+  template <class OtherTy>
+  MachineInstrBundleIterator(const MachineInstrBundleIterator<OtherTy> &I)
+      : MII(I.getInstrIterator()) {}
+  MachineInstrBundleIterator() : MII(nullptr) {}
+
+  Ty &operator*() const { return *MII; }
+  Ty *operator->() const { return &operator*(); }
+
+  // FIXME: This conversion should be explicit.
+  operator Ty *() const { return MII.getNodePtrUnchecked(); }
+
+  bool operator==(const MachineInstrBundleIterator &X) const {
+    return MII == X.MII;
+  }
+  bool operator!=(const MachineInstrBundleIterator &X) const {
+    return !operator==(X);
+  }
+
+  // Increment and decrement operators...
+  MachineInstrBundleIterator &operator--() {
+    do
+      --MII;
+    while (MII->isBundledWithPred());
+    return *this;
+  }
+  MachineInstrBundleIterator &operator++() {
+    while (MII->isBundledWithSucc())
+      ++MII;
+    ++MII;
+    return *this;
+  }
+  MachineInstrBundleIterator operator--(int) {
+    MachineInstrBundleIterator Temp = *this;
+    --*this;
+    return Temp;
+  }
+  MachineInstrBundleIterator operator++(int) {
+    MachineInstrBundleIterator Temp = *this;
+    ++*this;
+    return Temp;
+  }
+
+  instr_iterator getInstrIterator() const { return MII; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index 4868b7363f824..224a2a1aa59f5 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -44,14 +44,14 @@ class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
 public:
   MachineLoop();
 
-  /// getTopBlock - Return the "top" block in the loop, which is the first
-  /// block in the linear layout, ignoring any parts of the loop not
-  /// contiguous with the part the contains the header.
+  /// Return the "top" block in the loop, which is the first block in the linear
+  /// layout, ignoring any parts of the loop not contiguous with the part that
+  /// contains the header.
   MachineBasicBlock *getTopBlock();
 
-  /// getBottomBlock - Return the "bottom" block in the loop, which is the last
-  /// block in the linear layout, ignoring any parts of the loop not
-  /// contiguous with the part the contains the header.
+  /// Return the "bottom" block in the loop, which is the last block in the
+  /// linear layout, ignoring any parts of the loop not contiguous with the part
+  /// that contains the header.
   MachineBasicBlock *getBottomBlock();
 
   void dump() const;
@@ -81,72 +81,64 @@ public:
 
   LoopInfoBase<MachineBasicBlock, MachineLoop>& getBase() { return LI; }
 
-  /// iterator/begin/end - The interface to the top-level loops in the current
-  /// function.
-  ///
+  /// The iterator interface to the top-level loops in the current function.
   typedef LoopInfoBase<MachineBasicBlock, MachineLoop>::iterator iterator;
   inline iterator begin() const { return LI.begin(); }
   inline iterator end() const { return LI.end(); }
   bool empty() const { return LI.empty(); }
 
-  /// getLoopFor - Return the inner most loop that BB lives in.  If a basic
-  /// block is in no loop (for example the entry node), null is returned.
-  ///
+  /// Return the innermost loop that BB lives in. If a basic block is in no loop
+  /// (for example the entry node), null is returned.
   inline MachineLoop *getLoopFor(const MachineBasicBlock *BB) const {
     return LI.getLoopFor(BB);
   }
 
-  /// operator[] - same as getLoopFor...
-  ///
+  /// Same as getLoopFor.
   inline const MachineLoop *operator[](const MachineBasicBlock *BB) const {
     return LI.getLoopFor(BB);
   }
 
-  /// getLoopDepth - Return the loop nesting level of the specified block...
-  ///
+  /// Return the loop nesting level of the specified block.
   inline unsigned getLoopDepth(const MachineBasicBlock *BB) const {
     return LI.getLoopDepth(BB);
   }
 
-  // isLoopHeader - True if the block is a loop header node
+  /// True if the block is a loop header node.
   inline bool isLoopHeader(const MachineBasicBlock *BB) const {
     return LI.isLoopHeader(BB);
   }
 
-  /// runOnFunction - Calculate the natural loop information.
-  ///
+  /// Calculate the natural loop information.
   bool runOnMachineFunction(MachineFunction &F) override;
 
   void releaseMemory() override { LI.releaseMemory(); }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  /// removeLoop - This removes the specified top-level loop from this loop info
-  /// object.  The loop is not deleted, as it will presumably be inserted into
-  /// another loop.
+  /// This removes the specified top-level loop from this loop info object. The
+  /// loop is not deleted, as it will presumably be inserted into another loop.
   inline MachineLoop *removeLoop(iterator I) { return LI.removeLoop(I); }
 
-  /// changeLoopFor - Change the top-level loop that contains BB to the
-  /// specified loop.  This should be used by transformations that restructure
-  /// the loop hierarchy tree.
+  /// Change the top-level loop that contains BB to the specified loop. This
+  /// should be used by transformations that restructure the loop hierarchy
+  /// tree.
   inline void changeLoopFor(MachineBasicBlock *BB, MachineLoop *L) {
     LI.changeLoopFor(BB, L);
   }
 
-  /// changeTopLevelLoop - Replace the specified loop in the top-level loops
-  /// list with the indicated loop.
+  /// Replace the specified loop in the top-level loops list with the indicated
+  /// loop.
   inline void changeTopLevelLoop(MachineLoop *OldLoop, MachineLoop *NewLoop) {
     LI.changeTopLevelLoop(OldLoop, NewLoop);
   }
 
-  /// addTopLevelLoop - This adds the specified loop to the collection of
-  /// top-level loops.
+  /// This adds the specified loop to the collection of top-level loops.
   inline void addTopLevelLoop(MachineLoop *New) {
     LI.addTopLevelLoop(New);
   }
 
-  /// removeBlock - This method completely removes BB from all data structures,
-  /// including all of the Loop objects it is nested in and our mapping from
+  /// This method completely removes BB from all data structures, including all
+  /// of the Loop objects it is nested in and our mapping from
   /// MachineBasicBlocks to loops.
   void removeBlock(MachineBasicBlock *BB) {
     LI.removeBlock(BB);
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index 1ca0d90465a4a..5fa7058733b37 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_CODEGEN_MACHINEMEMOPERAND_H
 #define LLVM_CODEGEN_MACHINEMEMOPERAND_H
 
+#include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/IR/Metadata.h"
@@ -30,12 +31,11 @@ class raw_ostream;
 class MachineFunction;
 class ModuleSlotTracker;
 
-/// MachinePointerInfo - This class contains a discriminated union of
-/// information about pointers in memory operands, relating them back to LLVM IR
-/// or to virtual locations (such as frame indices) that are exposed during
-/// codegen.
+/// This class contains a discriminated union of information about pointers in
+/// memory operands, relating them back to LLVM IR or to virtual locations (such
+/// as frame indices) that are exposed during codegen.
 struct MachinePointerInfo {
-  /// V - This is the IR pointer value for the access, or it is null if unknown.
+  /// This is the IR pointer value for the access, or it is null if unknown.
   /// If this is null, then the access is to a pointer in the default address
   /// space.
   PointerUnion<const Value *, const PseudoSourceValue *> V;
@@ -57,34 +57,30 @@ struct MachinePointerInfo {
     return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O);
   }
 
-  /// getAddrSpace - Return the LLVM IR address space number that this pointer
-  /// points into.
+  /// Return the LLVM IR address space number that this pointer points into.
   unsigned getAddrSpace() const;
 
-  /// getConstantPool - Return a MachinePointerInfo record that refers to the
-  /// constant pool.
+  /// Return a MachinePointerInfo record that refers to the constant pool.
   static MachinePointerInfo getConstantPool(MachineFunction &MF);
 
-  /// getFixedStack - Return a MachinePointerInfo record that refers to the
-  /// the specified FrameIndex.
+  /// Return a MachinePointerInfo record that refers to the specified
+  /// FrameIndex.
   static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI,
                                           int64_t Offset = 0);
 
-  /// getJumpTable - Return a MachinePointerInfo record that refers to a
-  /// jump table entry.
+  /// Return a MachinePointerInfo record that refers to a jump table entry.
   static MachinePointerInfo getJumpTable(MachineFunction &MF);
 
-  /// getGOT - Return a MachinePointerInfo record that refers to a
-  /// GOT entry.
+  /// Return a MachinePointerInfo record that refers to a GOT entry.
   static MachinePointerInfo getGOT(MachineFunction &MF);
 
-  /// getStack - stack pointer relative access.
+  /// Stack pointer relative access.
   static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset);
 };
 
 
 //===----------------------------------------------------------------------===//
-/// MachineMemOperand - A description of a memory reference used in the backend.
+/// A description of a memory reference used in the backend.
 /// Instead of holding a StoreInst or LoadInst, this class holds the address
 /// Value of the reference along with a byte size and offset. This allows it
 /// to describe lowered loads and stores. Also, the special PseudoSourceValue
@@ -92,43 +88,50 @@ struct MachinePointerInfo {
 /// that aren't explicit in the regular LLVM IR.
 ///
 class MachineMemOperand {
-  MachinePointerInfo PtrInfo;
-  uint64_t Size;
-  unsigned Flags;
-  AAMDNodes AAInfo;
-  const MDNode *Ranges;
-
 public:
   /// Flags values. These may be or'd together.
-  enum MemOperandFlags {
+  enum Flags : uint16_t {
+    // No flags set.
+    MONone = 0,
     /// The memory access reads data.
-    MOLoad = 1,
+    MOLoad = 1u << 0,
     /// The memory access writes data.
-    MOStore = 2,
+    MOStore = 1u << 1,
     /// The memory access is volatile.
-    MOVolatile = 4,
+    MOVolatile = 1u << 2,
     /// The memory access is non-temporal.
-    MONonTemporal = 8,
+    MONonTemporal = 1u << 3,
     /// The memory access is invariant.
-    MOInvariant = 16,
-    // Target hints allow target passes to annotate memory operations.
-    MOTargetStartBit = 5,
-    MOTargetNumBits = 3,
-    // This is the number of bits we need to represent flags.
-    MOMaxBits = 8
+    MOInvariant = 1u << 4,
+
+    // Reserved for use by target-specific passes.
+    MOTargetFlag1 = 1u << 5,
+    MOTargetFlag2 = 1u << 6,
+    MOTargetFlag3 = 1u << 7,
+
+    LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ MOTargetFlag3)
   };
 
-  /// MachineMemOperand - Construct an MachineMemOperand object with the
-  /// specified PtrInfo, flags, size, and base alignment.
-  MachineMemOperand(MachinePointerInfo PtrInfo, unsigned flags, uint64_t s,
+private:
+  MachinePointerInfo PtrInfo;
+  uint64_t Size;
+  Flags FlagVals;
+  uint16_t BaseAlignLog2; // log_2(base_alignment) + 1
+  AAMDNodes AAInfo;
+  const MDNode *Ranges;
+
+public:
+  /// Construct a MachineMemOperand object with the specified PtrInfo, flags,
+  /// size, and base alignment.
+  MachineMemOperand(MachinePointerInfo PtrInfo, Flags flags, uint64_t s,
                     unsigned base_alignment,
                     const AAMDNodes &AAInfo = AAMDNodes(),
                     const MDNode *Ranges = nullptr);
 
   const MachinePointerInfo &getPointerInfo() const { return PtrInfo; }
 
-  /// getValue - Return the base address of the memory access. This may either
-  /// be a normal LLVM IR Value, or one of the special values used in CodeGen.
+  /// Return the base address of the memory access. This may either be a normal
+  /// LLVM IR Value, or one of the special values used in CodeGen.
   /// Special values are those obtained via
   /// PseudoSourceValue::getFixedStack(int), PseudoSourceValue::getStack, and
   /// other PseudoSourceValue member functions which return objects which stand
@@ -142,59 +145,58 @@ public:
 
   const void *getOpaqueValue() const { return PtrInfo.V.getOpaqueValue(); }
 
-  /// getFlags - Return the raw flags of the source value, \see MemOperandFlags.
-  unsigned int getFlags() const { return Flags & ((1 << MOMaxBits) - 1); }
+  /// Return the raw flags of the source value, \see Flags.
+  Flags getFlags() const { return FlagVals; }
 
   /// Bitwise OR the current flags with the given flags.
-  void setFlags(unsigned f) { Flags |= (f & ((1 << MOMaxBits) - 1)); }
+  void setFlags(Flags f) { FlagVals |= f; }
 
-  /// getOffset - For normal values, this is a byte offset added to the base
-  /// address. For PseudoSourceValue::FPRel values, this is the FrameIndex
-  /// number.
+  /// For normal values, this is a byte offset added to the base address.
+  /// For PseudoSourceValue::FPRel values, this is the FrameIndex number.
   int64_t getOffset() const { return PtrInfo.Offset; }
 
   unsigned getAddrSpace() const { return PtrInfo.getAddrSpace(); }
 
-  /// getSize - Return the size in bytes of the memory reference.
+  /// Return the size in bytes of the memory reference.
   uint64_t getSize() const { return Size; }
 
-  /// getAlignment - Return the minimum known alignment in bytes of the
-  /// actual memory reference.
+  /// Return the minimum known alignment in bytes of the actual memory
+  /// reference.
   uint64_t getAlignment() const;
 
-  /// getBaseAlignment - Return the minimum known alignment in bytes of the
-  /// base address, without the offset.
-  uint64_t getBaseAlignment() const { return (1u << (Flags >> MOMaxBits)) >> 1; }
+  /// Return the minimum known alignment in bytes of the base address, without
+  /// the offset.
+  uint64_t getBaseAlignment() const { return (1u << BaseAlignLog2) >> 1; }
 
-  /// getAAInfo - Return the AA tags for the memory reference.
+  /// Return the AA tags for the memory reference.
   AAMDNodes getAAInfo() const { return AAInfo; }
 
-  /// getRanges - Return the range tag for the memory reference.
+  /// Return the range tag for the memory reference.
   const MDNode *getRanges() const { return Ranges; }
 
-  bool isLoad() const { return Flags & MOLoad; }
-  bool isStore() const { return Flags & MOStore; }
-  bool isVolatile() const { return Flags & MOVolatile; }
-  bool isNonTemporal() const { return Flags & MONonTemporal; }
-  bool isInvariant() const { return Flags & MOInvariant; }
+  bool isLoad() const { return FlagVals & MOLoad; }
+  bool isStore() const { return FlagVals & MOStore; }
+  bool isVolatile() const { return FlagVals & MOVolatile; }
+  bool isNonTemporal() const { return FlagVals & MONonTemporal; }
+  bool isInvariant() const { return FlagVals & MOInvariant; }
 
-  /// isUnordered - Returns true if this memory operation doesn't have any
-  /// ordering constraints other than normal aliasing. Volatile and atomic
-  /// memory operations can't be reordered.
+  /// Returns true if this memory operation doesn't have any ordering
+  /// constraints other than normal aliasing. Volatile and atomic memory
+  /// operations can't be reordered.
   ///
   /// Currently, we don't model the difference between volatile and atomic
   /// operations. They should retain their ordering relative to all memory
   /// operations.
   bool isUnordered() const { return !isVolatile(); }
 
-  /// refineAlignment - Update this MachineMemOperand to reflect the alignment
-  /// of MMO, if it has a greater alignment. This must only be used when the
-  /// new alignment applies to all users of this MachineMemOperand.
+  /// Update this MachineMemOperand to reflect the alignment of MMO, if it has a
+  /// greater alignment. This must only be used when the new alignment applies
+  /// to all users of this MachineMemOperand.
   void refineAlignment(const MachineMemOperand *MMO);
 
-  /// setValue - Change the SourceValue for this MachineMemOperand. This
-  /// should only be used when an object is being relocated and all references
-  /// to it are being updated.
+  /// Change the SourceValue for this MachineMemOperand. This should only be
+  /// used when an object is being relocated and all references to it are being
+  /// updated.
   void setValue(const Value *NewSV) { PtrInfo.V = NewSV; }
   void setValue(const PseudoSourceValue *NewSV) { PtrInfo.V = NewSV; }
   void setOffset(int64_t NewOffset) { PtrInfo.Offset = NewOffset; }
diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h
index e7472145e71f5..f9fa6999073fc 100644
--- a/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -23,44 +23,35 @@ class MCSymbol;
 /// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation
 /// for MachO targets.
 class MachineModuleInfoMachO : public MachineModuleInfoImpl {
-  /// FnStubs - Darwin '$stub' stubs.  The key is something like "Lfoo$stub",
-  /// the value is something like "_foo".
-  DenseMap<MCSymbol *, StubValueTy> FnStubs;
-
   /// GVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something like
   /// "Lfoo$non_lazy_ptr", the value is something like "_foo". The extra bit
   /// is true if this GV is external.
   DenseMap<MCSymbol *, StubValueTy> GVStubs;
 
-  /// HiddenGVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something like
-  /// "Lfoo$non_lazy_ptr", the value is something like "_foo".  Unlike GVStubs
-  /// these are for things with hidden visibility. The extra bit is true if
-  /// this GV is external.
-  DenseMap<MCSymbol *, StubValueTy> HiddenGVStubs;
+  /// ThreadLocalGVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something
+  /// like "Lfoo$non_lazy_ptr", the value is something like "_foo". The extra
+  /// bit is true if this GV is external.
+  DenseMap<MCSymbol *, StubValueTy> ThreadLocalGVStubs;
 
   virtual void anchor(); // Out of line virtual method.
 public:
   MachineModuleInfoMachO(const MachineModuleInfo &) {}
 
-  StubValueTy &getFnStubEntry(MCSymbol *Sym) {
-    assert(Sym && "Key cannot be null");
-    return FnStubs[Sym];
-  }
-
   StubValueTy &getGVStubEntry(MCSymbol *Sym) {
     assert(Sym && "Key cannot be null");
     return GVStubs[Sym];
   }
 
-  StubValueTy &getHiddenGVStubEntry(MCSymbol *Sym) {
+  StubValueTy &getThreadLocalGVStubEntry(MCSymbol *Sym) {
     assert(Sym && "Key cannot be null");
-    return HiddenGVStubs[Sym];
+    return ThreadLocalGVStubs[Sym];
   }
 
   /// Accessor methods to return the set of stubs in sorted order.
-  SymbolListTy GetFnStubList() { return getSortedStubs(FnStubs); }
   SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); }
-  SymbolListTy GetHiddenGVStubList() { return getSortedStubs(HiddenGVStubs); }
+  SymbolListTy GetThreadLocalGVStubList() {
+    return getSortedStubs(ThreadLocalGVStubs);
+  }
 };
 
 /// MachineModuleInfoELF - This is a MachineModuleInfoImpl implementation
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index c43e47c36d060..ee0a9cf11e6ab 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -534,6 +534,15 @@ public:
     Contents.MBB = MBB;
   }
 
+  /// Sets value of register mask operand referencing Mask.  The
+  /// operand does not take ownership of the memory referenced by Mask, it must
+  /// remain valid for the lifetime of the operand. See CreateRegMask().
+  /// Any physreg with a 0 bit in the mask is clobbered by the instruction.
+  void setRegMask(const uint32_t *RegMaskPtr) {
+    assert(isRegMask() && "Wrong MachineOperand mutator");
+    Contents.RegMask = RegMaskPtr;
+  }
+
   //===--------------------------------------------------------------------===//
   // Other methods.
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 04191bc1b74ff..07d2d016f274b 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -16,7 +16,10 @@
 
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/iterator_range.h"
+// PointerUnion needs to have access to the full RegisterBank type.
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -26,6 +29,10 @@
 namespace llvm {
 class PSetIterator;
 
+/// Convenient type to represent either a register class or a register bank.
+typedef PointerUnion<const TargetRegisterClass *, const RegisterBank *>
+    RegClassOrRegBank;
+
 /// MachineRegisterInfo - Keep track of information for virtual and physical
 /// registers, including vreg register classes, use/def chains for registers,
 /// etc.
@@ -40,18 +47,9 @@ public:
   };
 
 private:
-  const MachineFunction *MF;
+  MachineFunction *MF;
   Delegate *TheDelegate;
 
-  /// IsSSA - True when the machine function is in SSA form and virtual
-  /// registers have a single def.
-  bool IsSSA;
-
-  /// TracksLiveness - True while register liveness is being tracked accurately.
-  /// Basic block live-in lists, kill flags, and implicit defs may not be
-  /// accurate when after this flag is cleared.
-  bool TracksLiveness;
-
   /// True if subregister liveness is tracked.
   bool TracksSubRegLiveness;
 
@@ -59,8 +57,9 @@ private:
   ///
   /// Each element in this list contains the register class of the vreg and the
   /// start of the use/def list for the register.
-  IndexedMap<std::pair<const TargetRegisterClass*, MachineOperand*>,
-             VirtReg2IndexFunctor> VRegInfo;
+  IndexedMap<std::pair<RegClassOrRegBank, MachineOperand *>,
+             VirtReg2IndexFunctor>
+      VRegInfo;
 
   /// RegAllocHints - This vector records register allocation hints for virtual
   /// registers. For each virtual register, it keeps a register and hint type
@@ -105,6 +104,18 @@ private:
   /// started.
   BitVector ReservedRegs;
 
+  typedef DenseMap<unsigned, unsigned> VRegToSizeMap;
+  /// Map generic virtual registers to their actual size.
+  mutable std::unique_ptr<VRegToSizeMap> VRegToSize;
+
+  /// Accessor for VRegToSize. This accessor should only be used
+  /// by global-isel related work.
+  VRegToSizeMap &getVRegToSize() const {
+    if (!VRegToSize)
+      VRegToSize.reset(new VRegToSizeMap);
+    return *VRegToSize.get();
+  }
+
   /// Keep track of the physical registers that are live in to the function.
   /// Live in values are typically arguments in registers.  LiveIn values are
   /// allowed to have virtual registers associated with them, stored in the
@@ -114,7 +125,7 @@ private:
   MachineRegisterInfo(const MachineRegisterInfo&) = delete;
   void operator=(const MachineRegisterInfo&) = delete;
 public:
-  explicit MachineRegisterInfo(const MachineFunction *MF);
+  explicit MachineRegisterInfo(MachineFunction *MF);
 
   const TargetRegisterInfo *getTargetRegisterInfo() const {
     return MF->getSubtarget().getRegisterInfo();
@@ -148,27 +159,32 @@ public:
   // The TwoAddressInstructionPass and PHIElimination passes take the machine
   // function out of SSA form when they introduce multiple defs per virtual
   // register.
-  bool isSSA() const { return IsSSA; }
+  bool isSSA() const {
+    return MF->getProperties().hasProperty(
+        MachineFunctionProperties::Property::IsSSA);
+  }
 
   // leaveSSA - Indicates that the machine function is no longer in SSA form.
-  void leaveSSA() { IsSSA = false; }
+  void leaveSSA() {
+    MF->getProperties().clear(MachineFunctionProperties::Property::IsSSA);
+  }
 
   /// tracksLiveness - Returns true when tracking register liveness accurately.
-  ///
-  /// While this flag is true, register liveness information in basic block
-  /// live-in lists and machine instruction operands is accurate. This means it
-  /// can be used to change the code in ways that affect the values in
-  /// registers, for example by the register scavenger.
-  ///
-  /// When this flag is false, liveness is no longer reliable.
-  bool tracksLiveness() const { return TracksLiveness; }
+  /// (see MachineFUnctionProperties::Property description for details)
+  bool tracksLiveness() const {
+    return MF->getProperties().hasProperty(
+        MachineFunctionProperties::Property::TracksLiveness);
+  }
 
   /// invalidateLiveness - Indicates that register liveness is no longer being
   /// tracked accurately.
   ///
   /// This should be called by late passes that invalidate the liveness
   /// information.
-  void invalidateLiveness() { TracksLiveness = false; }
+  void invalidateLiveness() {
+    MF->getProperties().clear(
+        MachineFunctionProperties::Property::TracksLiveness);
+  }
 
   /// Returns true if liveness for register class @p RC should be tracked at
   /// the subregister level.
@@ -375,8 +391,8 @@ public:
   /// specified register (it may be live-in).
   bool def_empty(unsigned RegNo) const { return def_begin(RegNo) == def_end(); }
 
-  /// hasOneDef - Return true if there is exactly one instruction defining the
-  /// specified register.
+  /// Return true if there is exactly one operand defining the specified
+  /// register.
   bool hasOneDef(unsigned RegNo) const {
     def_iterator DI = def_begin(RegNo);
     if (DI == def_end())
@@ -551,9 +567,47 @@ public:
   // Virtual Register Info
   //===--------------------------------------------------------------------===//
 
-  /// getRegClass - Return the register class of the specified virtual register.
+  /// Return the register class of the specified virtual register.
+  /// This shouldn't be used directly unless \p Reg has a register class.
+  /// \see getRegClassOrNull when this might happen.
   ///
   const TargetRegisterClass *getRegClass(unsigned Reg) const {
+    assert(VRegInfo[Reg].first.is<const TargetRegisterClass *>() &&
+           "Register class not set, wrong accessor");
+    return VRegInfo[Reg].first.get<const TargetRegisterClass *>();
+  }
+
+  /// Return the register class of \p Reg, or null if Reg has not been assigned
+  /// a register class yet.
+  ///
+  /// \note A null register class can only happen when these two
+  /// conditions are met:
+  /// 1. Generic virtual registers are created.
+  /// 2. The machine function has not completely been through the
+  ///    instruction selection process.
+  /// None of this condition is possible without GlobalISel for now.
+  /// In other words, if GlobalISel is not used or if the query happens after
+  /// the select pass, using getRegClass is safe.
+  const TargetRegisterClass *getRegClassOrNull(unsigned Reg) const {
+    const RegClassOrRegBank &Val = VRegInfo[Reg].first;
+    return Val.dyn_cast<const TargetRegisterClass *>();
+  }
+
+  /// Return the register bank of \p Reg, or null if Reg has not been assigned
+  /// a register bank or has been assigned a register class.
+  /// \note It is possible to get the register bank from the register class via
+  /// RegisterBankInfo::getRegBankFromRegClass.
+  ///
+  const RegisterBank *getRegBankOrNull(unsigned Reg) const {
+    const RegClassOrRegBank &Val = VRegInfo[Reg].first;
+    return Val.dyn_cast<const RegisterBank *>();
+  }
+
+  /// Return the register bank or register class of \p Reg.
+  /// \note Before the register bank gets assigned (i.e., before the
+  /// RegBankSelect pass) \p Reg may not have either.
+  ///
+  const RegClassOrRegBank &getRegClassOrRegBank(unsigned Reg) const {
     return VRegInfo[Reg].first;
   }
 
@@ -561,6 +615,10 @@ public:
   ///
   void setRegClass(unsigned Reg, const TargetRegisterClass *RC);
 
+  /// Set the register bank to \p RegBank for \p Reg.
+  ///
+  void setRegBank(unsigned Reg, const RegisterBank &RegBank);
+
   /// constrainRegClass - Constrain the register class of the specified virtual
   /// register to be a common subclass of RC and the current register class,
   /// but only if the new class has at least MinNumRegs registers.  Return the
@@ -587,6 +645,19 @@ public:
   ///
   unsigned createVirtualRegister(const TargetRegisterClass *RegClass);
 
+  /// Get the size in bits of \p VReg or 0 if VReg is not a generic
+  /// (target independent) virtual register.
+  unsigned getSize(unsigned VReg) const;
+
+  /// Set the size in bits of \p VReg to \p Size.
+  /// Although the size should be set at build time, mir infrastructure
+  /// is not yet able to do it.
+  void setSize(unsigned VReg, unsigned Size);
+
+  /// Create and return a new generic virtual register with a size of \p Size.
+  /// \pre Size > 0.
+  unsigned createGenericVirtualRegister(unsigned Size);
+
   /// getNumVirtRegs - Return the number of virtual registers created.
   ///
   unsigned getNumVirtRegs() const { return VRegInfo.size(); }
@@ -632,9 +703,10 @@ public:
   /// Return true if the specified register is modified in this function.
   /// This checks that no defining machine operands exist for the register or
   /// any of its aliases. Definitions found on functions marked noreturn are
-  /// ignored. The register is also considered modified when it is set in the
-  /// UsedPhysRegMask.
-  bool isPhysRegModified(unsigned PhysReg) const;
+  /// ignored, to consider them pass 'true' for optional parameter
+  /// SkipNoReturnDef. The register is also considered modified when it is set
+  /// in the UsedPhysRegMask.
+  bool isPhysRegModified(unsigned PhysReg, bool SkipNoReturnDef = false) const;
 
   /// Return true if the specified register is modified or read in this
   /// function. This checks that no machine operands exist for the register or
@@ -820,10 +892,10 @@ public:
           advance();
         } while (Op && Op->getParent() == P);
       } else if (ByBundle) {
-        MachineInstr *P = getBundleStart(Op->getParent());
+        MachineInstr &P = getBundleStart(*Op->getParent());
         do {
           advance();
-        } while (Op && getBundleStart(Op->getParent()) == P);
+        } while (Op && &getBundleStart(*Op->getParent()) == &P);
       }
 
       return *this;
@@ -922,10 +994,10 @@ public:
           advance();
         } while (Op && Op->getParent() == P);
       } else if (ByBundle) {
-        MachineInstr *P = getBundleStart(Op->getParent());
+        MachineInstr &P = getBundleStart(*Op->getParent());
         do {
           advance();
-        } while (Op && getBundleStart(Op->getParent()) == P);
+        } while (Op && &getBundleStart(*Op->getParent()) == &P);
       }
 
       return *this;
@@ -937,15 +1009,12 @@ public:
     // Retrieve a reference to the current operand.
     MachineInstr &operator*() const {
       assert(Op && "Cannot dereference end iterator!");
-      if (ByBundle) return *(getBundleStart(Op->getParent()));
+      if (ByBundle)
+        return getBundleStart(*Op->getParent());
       return *Op->getParent();
     }
 
-    MachineInstr *operator->() const {
-      assert(Op && "Cannot dereference end iterator!");
-      if (ByBundle) return getBundleStart(Op->getParent());
-      return Op->getParent();
-    }
+    MachineInstr *operator->() const { return &operator*(); }
   };
 };
 
diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h
index 5f988ad86320f..50a7d90bf25b0 100644
--- a/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_CODEGEN_MACHINESSAUPDATER_H
 #define LLVM_CODEGEN_MACHINESSAUPDATER_H
 
-#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 358fd5a3732a6..06e9921790316 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -66,8 +66,6 @@
 //
 // void <SubTarget>Subtarget::
 // overrideSchedPolicy(MachineSchedPolicy &Policy,
-//                     MachineInstr *begin,
-//                     MachineInstr *end,
 //                     unsigned NumRegionInstrs) const {
 //   Policy.<Flag> = true;
 // }
@@ -81,6 +79,7 @@
 #include "llvm/CodeGen/MachinePassRegistry.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
 #include <memory>
 
 namespace llvm {
@@ -150,6 +149,9 @@ class ScheduleDAGMI;
 struct MachineSchedPolicy {
   // Allow the scheduler to disable register pressure tracking.
   bool ShouldTrackPressure;
+  /// Track LaneMasks to allow reordering of independent subregister writes
+  /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks()
+  bool ShouldTrackLaneMasks;
 
   // Allow the scheduler to force top-down or bottom-up scheduling. If neither
   // is true, the scheduler runs in both directions and converges.
@@ -160,8 +162,8 @@ struct MachineSchedPolicy {
   // first.
   bool DisableLatencyHeuristic;
 
-  MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false),
-    OnlyBottomUp(false), DisableLatencyHeuristic(false) {}
+  MachineSchedPolicy(): ShouldTrackPressure(false), ShouldTrackLaneMasks(false),
+    OnlyTopDown(false), OnlyBottomUp(false), DisableLatencyHeuristic(false) {}
 };
 
 /// MachineSchedStrategy - Interface to the scheduling algorithm used by
@@ -185,6 +187,11 @@ public:
   /// initializing this strategy. Called after initPolicy.
   virtual bool shouldTrackPressure() const { return true; }
 
+  /// Returns true if lanemasks should be tracked. LaneMask tracking is
+  /// necessary to reorder independent subregister defs for the same vreg.
+  /// This has to be enabled in combination with shouldTrackPressure().
+  virtual bool shouldTrackLaneMasks() const { return false; }
+
   /// Initialize the strategy after building the DAG for a new region.
   virtual void initialize(ScheduleDAGMI *DAG) = 0;
 
@@ -212,15 +219,6 @@ public:
   virtual void releaseBottomNode(SUnit *SU) = 0;
 };
 
-/// Mutate the DAG as a postpass after normal DAG building.
-class ScheduleDAGMutation {
-  virtual void anchor();
-public:
-  virtual ~ScheduleDAGMutation() {}
-
-  virtual void apply(ScheduleDAGMI *DAG) = 0;
-};
-
 /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply
 /// schedules machine instructions according to the given MachineSchedStrategy
 /// without much extra book-keeping. This is the common functionality between
@@ -371,6 +369,7 @@ protected:
 
   /// Register pressure in this region computed by initRegPressure.
   bool ShouldTrackPressure;
+  bool ShouldTrackLaneMasks;
   IntervalPressure RegPressure;
   RegPressureTracker RPTracker;
 
@@ -387,13 +386,18 @@ protected:
   IntervalPressure BotPressure;
   RegPressureTracker BotRPTracker;
 
+  /// True if disconnected subregister components are already renamed.
+  /// The renaming is only done on demand if lane masks are tracked.
+  bool DisconnectedComponentsRenamed;
+
 public:
   ScheduleDAGMILive(MachineSchedContext *C,
                     std::unique_ptr<MachineSchedStrategy> S)
       : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false),
         RegClassInfo(C->RegClassInfo), DFSResult(nullptr),
-        ShouldTrackPressure(false), RPTracker(RegPressure),
-        TopRPTracker(TopPressure), BotRPTracker(BotPressure) {}
+        ShouldTrackPressure(false), ShouldTrackLaneMasks(false),
+        RPTracker(RegPressure), TopRPTracker(TopPressure),
+        BotRPTracker(BotPressure), DisconnectedComponentsRenamed(false) {}
 
   ~ScheduleDAGMILive() override;
 
@@ -455,6 +459,10 @@ protected:
   /// bottom of the DAG region without covereing any unscheduled instruction.
   void buildDAGWithRegPressure();
 
+  /// Release ExitSU predecessors and setup scheduler queues. Re-position
+  /// the Top RP tracker in case the region beginning has changed.
+  void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
+
   /// Move an instruction and update register pressure.
   void scheduleMI(SUnit *SU, bool IsTopNode);
 
@@ -462,7 +470,7 @@ protected:
 
   void initRegPressure();
 
-  void updatePressureDiffs(ArrayRef<unsigned> LiveUses);
+  void updatePressureDiffs(ArrayRef<RegisterMaskPair> LiveUses);
 
   void updateScheduledPressure(const SUnit *SU,
                                const std::vector<unsigned> &NewMaxPressure);
@@ -753,9 +761,9 @@ class GenericSchedulerBase : public MachineSchedStrategy {
 public:
   /// Represent the type of SchedCandidate found within a single queue.
   /// pickNodeBidirectional depends on these listed by decreasing priority.
-  enum CandReason {
-    NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
-    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
+  enum CandReason : uint8_t {
+    NoCand, Only1, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak,
+    RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
     TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
 
 #ifndef NDEBUG
@@ -769,6 +777,15 @@ public:
     unsigned DemandResIdx;
 
     CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
+
+    bool operator==(const CandPolicy &RHS) const {
+      return ReduceLatency == RHS.ReduceLatency &&
+             ReduceResIdx == RHS.ReduceResIdx &&
+             DemandResIdx == RHS.DemandResIdx;
+    }
+    bool operator!=(const CandPolicy &RHS) const {
+      return !(*this == RHS);
+    }
   };
 
   /// Status of an instruction's critical resource consumption.
@@ -801,8 +818,8 @@ public:
     // The reason for this candidate.
     CandReason Reason;
 
-    // Set of reasons that apply to multiple candidates.
-    uint32_t RepeatReasonSet;
+    // Whether this candidate should be scheduled at top/bottom.
+    bool AtTop;
 
     // Register pressure values for the best candidate.
     RegPressureDelta RPDelta;
@@ -810,8 +827,17 @@ public:
     // Critical resource consumption of the best candidate.
     SchedResourceDelta ResDelta;
 
-    SchedCandidate(const CandPolicy &policy)
-      : Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {}
+    SchedCandidate() { reset(CandPolicy()); }
+    SchedCandidate(const CandPolicy &Policy) { reset(Policy); }
+
+    void reset(const CandPolicy &NewPolicy) {
+      Policy = NewPolicy;
+      SU = nullptr;
+      Reason = NoCand;
+      AtTop = false;
+      RPDelta = RegPressureDelta();
+      ResDelta = SchedResourceDelta();
+    }
 
     bool isValid() const { return SU; }
 
@@ -820,13 +846,11 @@ public:
       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
       SU = Best.SU;
       Reason = Best.Reason;
+      AtTop = Best.AtTop;
       RPDelta = Best.RPDelta;
       ResDelta = Best.ResDelta;
     }
 
-    bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
-    void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
-
     void initResourceDelta(const ScheduleDAGMI *DAG,
                            const TargetSchedModel *SchedModel);
   };
@@ -858,6 +882,11 @@ class GenericScheduler : public GenericSchedulerBase {
   SchedBoundary Top;
   SchedBoundary Bot;
 
+  /// Candidate last picked from Top boundary.
+  SchedCandidate TopCand;
+  /// Candidate last picked from Bot boundary.
+  SchedCandidate BotCand;
+
   MachineSchedPolicy RegionPolicy;
 public:
   GenericScheduler(const MachineSchedContext *C):
@@ -874,6 +903,10 @@ public:
     return RegionPolicy.ShouldTrackPressure;
   }
 
+  bool shouldTrackLaneMasks() const override {
+    return RegionPolicy.ShouldTrackLaneMasks;
+  }
+
   void initialize(ScheduleDAGMI *dag) override;
 
   SUnit *pickNode(bool &IsTopNode) override;
@@ -882,10 +915,12 @@ public:
 
   void releaseTopNode(SUnit *SU) override {
     Top.releaseTopNode(SU);
+    TopCand.SU = nullptr;
   }
 
   void releaseBottomNode(SUnit *SU) override {
     Bot.releaseBottomNode(SU);
+    BotCand.SU = nullptr;
   }
 
   void registerRoots() override;
@@ -893,15 +928,18 @@ public:
 protected:
   void checkAcyclicLatency();
 
+  void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
+                     const RegPressureTracker &RPTracker,
+                     RegPressureTracker &TempTracker);
+
   void tryCandidate(SchedCandidate &Cand,
                     SchedCandidate &TryCand,
-                    SchedBoundary &Zone,
-                    const RegPressureTracker &RPTracker,
-                    RegPressureTracker &TempTracker);
+                    SchedBoundary *Zone);
 
   SUnit *pickNodeBidirectional(bool &IsTopNode);
 
   void pickNodeFromQueue(SchedBoundary &Zone,
+                         const CandPolicy &ZonePolicy,
                          const RegPressureTracker &RPTracker,
                          SchedCandidate &Candidate);
 
diff --git a/include/llvm/CodeGen/MachineTraceMetrics.h b/include/llvm/CodeGen/MachineTraceMetrics.h
index bfe6e945b6da6..06db17abaed91 100644
--- a/include/llvm/CodeGen/MachineTraceMetrics.h
+++ b/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -50,6 +50,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 
 namespace llvm {
@@ -275,24 +276,24 @@ public:
     /// Return the depth and height of MI. The depth is only valid for
     /// instructions in or above the trace center block. The height is only
     /// valid for instructions in or below the trace center block.
-    InstrCycles getInstrCycles(const MachineInstr *MI) const {
-      return TE.Cycles.lookup(MI);
+    InstrCycles getInstrCycles(const MachineInstr &MI) const {
+      return TE.Cycles.lookup(&MI);
     }
 
     /// Return the slack of MI. This is the number of cycles MI can be delayed
     /// before the critical path becomes longer.
     /// MI must be an instruction in the trace center block.
-    unsigned getInstrSlack(const MachineInstr *MI) const;
+    unsigned getInstrSlack(const MachineInstr &MI) const;
 
     /// Return the Depth of a PHI instruction in a trace center block successor.
     /// The PHI does not have to be part of the trace.
-    unsigned getPHIDepth(const MachineInstr *PHI) const;
+    unsigned getPHIDepth(const MachineInstr &PHI) const;
 
     /// A dependence is useful if the basic block of the defining instruction
     /// is part of the trace of the user instruction. It is assumed that DefMI
     /// dominates UseMI (see also isUsefulDominator).
-    bool isDepInTrace(const MachineInstr *DefMI,
-                      const MachineInstr *UseMI) const;
+    bool isDepInTrace(const MachineInstr &DefMI,
+                      const MachineInstr &UseMI) const;
   };
 
   /// A trace ensemble is a collection of traces selected using the same
diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h
index 04d6ee3be531a..0bb53d1a53743 100644
--- a/include/llvm/CodeGen/MachineValueType.h
+++ b/include/llvm/CodeGen/MachineValueType.h
@@ -28,7 +28,7 @@ namespace llvm {
   /// type can be represented by an MVT.
 class MVT {
   public:
-    enum SimpleValueType {
+    enum SimpleValueType : int8_t {
       // INVALID_SIMPLE_VALUE_TYPE - Simple value types less than zero are
       // considered extended value types.
       INVALID_SIMPLE_VALUE_TYPE = -1,
@@ -142,38 +142,38 @@ class MVT {
       MAX_ALLOWED_VALUETYPE = 96,
 
       // Token - A value of type llvm::TokenTy
-      token          = 249,
+      token          = 120,
 
       // Metadata - This is MDNode or MDString.
-      Metadata       = 250,
+      Metadata       = 121,
 
       // iPTRAny - An int value the size of the pointer of the current
       // target to any address space. This must only be used internal to
       // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR.
-      iPTRAny        = 251,
+      iPTRAny        = 122,
 
       // vAny - A vector with any length and element size. This is used
       // for intrinsics that have overloadings based on vector types.
       // This is only for tblgen's consumption!
-      vAny           = 252,
+      vAny           = 123,
 
       // fAny - Any floating-point or vector floating-point value. This is used
       // for intrinsics that have overloadings based on floating-point types.
       // This is only for tblgen's consumption!
-      fAny           = 253,
+      fAny           = 124,
 
       // iAny - An integer or vector integer value of any bit width. This is
       // used for intrinsics that have overloadings based on integer bit widths.
       // This is only for tblgen's consumption!
-      iAny           = 254,
+      iAny           = 125,
 
       // iPTR - An int value the size of the pointer of the current
       // target.  This should only be used internal to tblgen!
-      iPTR           = 255,
+      iPTR           = 126,
 
       // Any - Any type. This is used for intrinsics that have overloadings.
       // This is only for tblgen's consumption!
-      Any            = 256
+      Any            = 127
     };
 
     SimpleValueType SimpleTy;
@@ -210,6 +210,13 @@ class MVT {
                SimpleTy <= MVT::LAST_INTEGER_VECTOR_VALUETYPE));
     }
 
+    /// isScalarInteger - Return true if this is an integer, not including
+    /// vectors.
+    bool isScalarInteger() const {
+      return (SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
+              SimpleTy <= MVT::LAST_INTEGER_VALUETYPE);
+    }
+
     /// isVector - Return true if this is a vector value type.
     bool isVector() const {
       return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE &&
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index f73383ed10007..8301ca4d85366 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -15,12 +15,11 @@
 #ifndef LLVM_CODEGEN_PBQP_GRAPH_H
 #define LLVM_CODEGEN_PBQP_GRAPH_H
 
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
 #include "llvm/Support/Debug.h"
-#include <list>
-#include <map>
-#include <set>
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <utility>
 #include <vector>
 
 namespace llvm {
@@ -72,7 +71,7 @@ namespace PBQP {
         return std::numeric_limits<AdjEdgeIdx>::max();
       }
 
-      NodeEntry(VectorPtr Costs) : Costs(Costs) {}
+      NodeEntry(VectorPtr Costs) : Costs(std::move(Costs)) {}
 
       AdjEdgeIdx addAdjEdgeId(EdgeId EId) {
         AdjEdgeIdx Idx = AdjEdgeIds.size();
@@ -103,7 +102,7 @@ namespace PBQP {
     class EdgeEntry {
     public:
       EdgeEntry(NodeId N1Id, NodeId N2Id, MatrixPtr Costs)
-        : Costs(Costs) {
+          : Costs(std::move(Costs)) {
         NIds[0] = N1Id;
         NIds[1] = N2Id;
         ThisEdgeAdjIdxs[0] = NodeEntry::getInvalidAdjEdgeIdx();
@@ -348,7 +347,8 @@ namespace PBQP {
     Graph() : Solver(nullptr) {}
 
     /// @brief Construct an empty PBQP graph with the given graph metadata.
-    Graph(GraphMetadata Metadata) : Metadata(Metadata), Solver(nullptr) {}
+    Graph(GraphMetadata Metadata)
+        : Metadata(std::move(Metadata)), Solver(nullptr) {}
 
     /// @brief Get a reference to the graph metadata.
     GraphMetadata& getMetadata() { return Metadata; }
diff --git a/include/llvm/CodeGen/ParallelCG.h b/include/llvm/CodeGen/ParallelCG.h
index fa7002fa21fb4..14ef0ec408ba1 100644
--- a/include/llvm/CodeGen/ParallelCG.h
+++ b/include/llvm/CodeGen/ParallelCG.h
@@ -14,29 +14,34 @@
 #ifndef LLVM_CODEGEN_PARALLELCG_H
 #define LLVM_CODEGEN_PARALLELCG_H
 
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Target/TargetMachine.h"
 
+#include <functional>
+
 namespace llvm {
 
+template <typename T> class ArrayRef;
 class Module;
 class TargetOptions;
 class raw_pwrite_stream;
 
-/// Split M into OSs.size() partitions, and generate code for each. Writes
-/// OSs.size() output files to the output streams in OSs. The resulting output
-/// files if linked together are intended to be equivalent to the single output
-/// file that would have been code generated from M.
+/// Split M into OSs.size() partitions, and generate code for each. Takes a
+/// factory function for the TargetMachine TMFactory. Writes OSs.size() output
+/// files to the output streams in OSs. The resulting output files if linked
+/// together are intended to be equivalent to the single output file that would
+/// have been code generated from M.
+///
+/// Writes bitcode for individual partitions into output streams in BCOSs, if
+/// BCOSs is not empty.
 ///
 /// \returns M if OSs.size() == 1, otherwise returns std::unique_ptr<Module>().
 std::unique_ptr<Module>
 splitCodeGen(std::unique_ptr<Module> M, ArrayRef<raw_pwrite_stream *> OSs,
-             StringRef CPU, StringRef Features, const TargetOptions &Options,
-             Reloc::Model RM = Reloc::Default,
-             CodeModel::Model CM = CodeModel::Default,
-             CodeGenOpt::Level OL = CodeGenOpt::Default,
-             TargetMachine::CodeGenFileType FT = TargetMachine::CGFT_ObjectFile);
+             ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
+             const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
+             TargetMachine::CodeGenFileType FT = TargetMachine::CGFT_ObjectFile,
+             bool PreserveLocals = false);
 
 } // namespace llvm
 
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index f45f0ed57d6b6..ae9e5dfe2d65e 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -15,352 +15,21 @@
 #ifndef LLVM_CODEGEN_PASSES_H
 #define LLVM_CODEGEN_PASSES_H
 
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetMachine.h"
 #include <functional>
 #include <string>
 
 namespace llvm {
 
+class Function;
+class FunctionPass;
 class MachineFunctionPass;
-class PassConfigImpl;
-class PassInfo;
-class ScheduleDAGInstrs;
-class TargetLowering;
-class TargetLoweringBase;
+class ModulePass;
+class Pass;
+class TargetMachine;
 class TargetRegisterClass;
 class raw_ostream;
-struct MachineSchedContext;
-
-// The old pass manager infrastructure is hidden in a legacy namespace now.
-namespace legacy {
-class PassManagerBase;
-}
-using legacy::PassManagerBase;
-
-/// Discriminated union of Pass ID types.
-///
-/// The PassConfig API prefers dealing with IDs because they are safer and more
-/// efficient. IDs decouple configuration from instantiation. This way, when a
-/// pass is overriden, it isn't unnecessarily instantiated. It is also unsafe to
-/// refer to a Pass pointer after adding it to a pass manager, which deletes
-/// redundant pass instances.
-///
-/// However, it is convient to directly instantiate target passes with
-/// non-default ctors. These often don't have a registered PassInfo. Rather than
-/// force all target passes to implement the pass registry boilerplate, allow
-/// the PassConfig API to handle either type.
-///
-/// AnalysisID is sadly char*, so PointerIntPair won't work.
-class IdentifyingPassPtr {
-  union {
-    AnalysisID ID;
-    Pass *P;
-  };
-  bool IsInstance;
-public:
-  IdentifyingPassPtr() : P(nullptr), IsInstance(false) {}
-  IdentifyingPassPtr(AnalysisID IDPtr) : ID(IDPtr), IsInstance(false) {}
-  IdentifyingPassPtr(Pass *InstancePtr) : P(InstancePtr), IsInstance(true) {}
-
-  bool isValid() const { return P; }
-  bool isInstance() const { return IsInstance; }
-
-  AnalysisID getID() const {
-    assert(!IsInstance && "Not a Pass ID");
-    return ID;
-  }
-  Pass *getInstance() const {
-    assert(IsInstance && "Not a Pass Instance");
-    return P;
-  }
-};
-
-template <> struct isPodLike<IdentifyingPassPtr> {
-  static const bool value = true;
-};
-
-/// Target-Independent Code Generator Pass Configuration Options.
-///
-/// This is an ImmutablePass solely for the purpose of exposing CodeGen options
-/// to the internals of other CodeGen passes.
-class TargetPassConfig : public ImmutablePass {
-public:
-  /// Pseudo Pass IDs. These are defined within TargetPassConfig because they
-  /// are unregistered pass IDs. They are only useful for use with
-  /// TargetPassConfig APIs to identify multiple occurrences of the same pass.
-  ///
-
-  /// EarlyTailDuplicate - A clone of the TailDuplicate pass that runs early
-  /// during codegen, on SSA form.
-  static char EarlyTailDuplicateID;
-
-  /// PostRAMachineLICM - A clone of the LICM pass that runs during late machine
-  /// optimization after regalloc.
-  static char PostRAMachineLICMID;
-
-private:
-  PassManagerBase *PM;
-  AnalysisID StartBefore, StartAfter;
-  AnalysisID StopAfter;
-  bool Started;
-  bool Stopped;
-  bool AddingMachinePasses;
-
-protected:
-  TargetMachine *TM;
-  PassConfigImpl *Impl; // Internal data structures
-  bool Initialized;     // Flagged after all passes are configured.
-
-  // Target Pass Options
-  // Targets provide a default setting, user flags override.
-  //
-  bool DisableVerify;
-
-  /// Default setting for -enable-tail-merge on this target.
-  bool EnableTailMerge;
-
-public:
-  TargetPassConfig(TargetMachine *tm, PassManagerBase &pm);
-  // Dummy constructor.
-  TargetPassConfig();
-
-  ~TargetPassConfig() override;
-
-  static char ID;
-
-  /// Get the right type of TargetMachine for this target.
-  template<typename TMC> TMC &getTM() const {
-    return *static_cast<TMC*>(TM);
-  }
-
-  //
-  void setInitialized() { Initialized = true; }
-
-  CodeGenOpt::Level getOptLevel() const { return TM->getOptLevel(); }
-
-  /// Set the StartAfter, StartBefore and StopAfter passes to allow running only
-  /// a portion of the normal code-gen pass sequence.
-  ///
-  /// If the StartAfter and StartBefore pass ID is zero, then compilation will
-  /// begin at the normal point; otherwise, clear the Started flag to indicate
-  /// that passes should not be added until the starting pass is seen.  If the
-  /// Stop pass ID is zero, then compilation will continue to the end.
-  ///
-  /// This function expects that at least one of the StartAfter or the
-  /// StartBefore pass IDs is null.
-  void setStartStopPasses(AnalysisID StartBefore, AnalysisID StartAfter,
-                          AnalysisID StopAfter) {
-    if (StartAfter)
-      assert(!StartBefore && "Start after and start before passes are given");
-    this->StartBefore = StartBefore;
-    this->StartAfter = StartAfter;
-    this->StopAfter = StopAfter;
-    Started = (StartAfter == nullptr) && (StartBefore == nullptr);
-  }
-
-  void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); }
 
-  bool getEnableTailMerge() const { return EnableTailMerge; }
-  void setEnableTailMerge(bool Enable) { setOpt(EnableTailMerge, Enable); }
-
-  /// Allow the target to override a specific pass without overriding the pass
-  /// pipeline. When passes are added to the standard pipeline at the
-  /// point where StandardID is expected, add TargetID in its place.
-  void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID);
-
-  /// Insert InsertedPassID pass after TargetPassID pass.
-  void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
-                  bool VerifyAfter = true, bool PrintAfter = true);
-
-  /// Allow the target to enable a specific standard pass by default.
-  void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
-
-  /// Allow the target to disable a specific standard pass by default.
-  void disablePass(AnalysisID PassID) {
-    substitutePass(PassID, IdentifyingPassPtr());
-  }
-
-  /// Return the pass substituted for StandardID by the target.
-  /// If no substitution exists, return StandardID.
-  IdentifyingPassPtr getPassSubstitution(AnalysisID StandardID) const;
-
-  /// Return true if the optimized regalloc pipeline is enabled.
-  bool getOptimizeRegAlloc() const;
-
-  /// Return true if shrink wrapping is enabled.
-  bool getEnableShrinkWrap() const;
-
-  /// Return true if the default global register allocator is in use and
-  /// has not be overriden on the command line with '-regalloc=...'
-  bool usingDefaultRegAlloc() const;
-
-  /// Add common target configurable passes that perform LLVM IR to IR
-  /// transforms following machine independent optimization.
-  virtual void addIRPasses();
-
-  /// Add passes to lower exception handling for the code generator.
-  void addPassesToHandleExceptions();
-
-  /// Add pass to prepare the LLVM IR for code generation. This should be done
-  /// before exception handling preparation passes.
-  virtual void addCodeGenPrepare();
-
-  /// Add common passes that perform LLVM IR to IR transforms in preparation for
-  /// instruction selection.
-  virtual void addISelPrepare();
-
-  /// addInstSelector - This method should install an instruction selector pass,
-  /// which converts from LLVM code to machine instructions.
-  virtual bool addInstSelector() {
-    return true;
-  }
-
-  /// Add the complete, standard set of LLVM CodeGen passes.
-  /// Fully developed targets will not generally override this.
-  virtual void addMachinePasses();
-
-  /// Create an instance of ScheduleDAGInstrs to be run within the standard
-  /// MachineScheduler pass for this function and target at the current
-  /// optimization level.
-  ///
-  /// This can also be used to plug a new MachineSchedStrategy into an instance
-  /// of the standard ScheduleDAGMI:
-  ///   return new ScheduleDAGMI(C, make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false)
-  ///
-  /// Return NULL to select the default (generic) machine scheduler.
-  virtual ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const {
-    return nullptr;
-  }
-
-  /// Similar to createMachineScheduler but used when postRA machine scheduling
-  /// is enabled.
-  virtual ScheduleDAGInstrs *
-  createPostMachineScheduler(MachineSchedContext *C) const {
-    return nullptr;
-  }
-
-protected:
-  // Helper to verify the analysis is really immutable.
-  void setOpt(bool &Opt, bool Val);
-
-  /// Methods with trivial inline returns are convenient points in the common
-  /// codegen pass pipeline where targets may insert passes. Methods with
-  /// out-of-line standard implementations are major CodeGen stages called by
-  /// addMachinePasses. Some targets may override major stages when inserting
-  /// passes is insufficient, but maintaining overriden stages is more work.
-  ///
-
-  /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM
-  /// passes (which are run just before instruction selector).
-  virtual bool addPreISel() {
-    return true;
-  }
-
-  /// addMachineSSAOptimization - Add standard passes that optimize machine
-  /// instructions in SSA form.
-  virtual void addMachineSSAOptimization();
-
-  /// Add passes that optimize instruction level parallelism for out-of-order
-  /// targets. These passes are run while the machine code is still in SSA
-  /// form, so they can use MachineTraceMetrics to control their heuristics.
-  ///
-  /// All passes added here should preserve the MachineDominatorTree,
-  /// MachineLoopInfo, and MachineTraceMetrics analyses.
-  virtual bool addILPOpts() {
-    return false;
-  }
-
-  /// This method may be implemented by targets that want to run passes
-  /// immediately before register allocation.
-  virtual void addPreRegAlloc() { }
-
-  /// createTargetRegisterAllocator - Create the register allocator pass for
-  /// this target at the current optimization level.
-  virtual FunctionPass *createTargetRegisterAllocator(bool Optimized);
-
-  /// addFastRegAlloc - Add the minimum set of target-independent passes that
-  /// are required for fast register allocation.
-  virtual void addFastRegAlloc(FunctionPass *RegAllocPass);
-
-  /// addOptimizedRegAlloc - Add passes related to register allocation.
-  /// LLVMTargetMachine provides standard regalloc passes for most targets.
-  virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
-
-  /// addPreRewrite - Add passes to the optimized register allocation pipeline
-  /// after register allocation is complete, but before virtual registers are
-  /// rewritten to physical registers.
-  ///
-  /// These passes must preserve VirtRegMap and LiveIntervals, and when running
-  /// after RABasic or RAGreedy, they should take advantage of LiveRegMatrix.
-  /// When these passes run, VirtRegMap contains legal physreg assignments for
-  /// all virtual registers.
-  virtual bool addPreRewrite() {
-    return false;
-  }
-
-  /// This method may be implemented by targets that want to run passes after
-  /// register allocation pass pipeline but before prolog-epilog insertion.
-  virtual void addPostRegAlloc() { }
-
-  /// Add passes that optimize machine instructions after register allocation.
-  virtual void addMachineLateOptimization();
-
-  /// This method may be implemented by targets that want to run passes after
-  /// prolog-epilog insertion and before the second instruction scheduling pass.
-  virtual void addPreSched2() { }
-
-  /// addGCPasses - Add late codegen passes that analyze code for garbage
-  /// collection. This should return true if GC info should be printed after
-  /// these passes.
-  virtual bool addGCPasses();
-
-  /// Add standard basic block placement passes.
-  virtual void addBlockPlacement();
-
-  /// This pass may be implemented by targets that want to run passes
-  /// immediately before machine code is emitted.
-  virtual void addPreEmitPass() { }
-
-  /// Utilities for targets to add passes to the pass manager.
-  ///
-
-  /// Add a CodeGen pass at this point in the pipeline after checking overrides.
-  /// Return the pass that was added, or zero if no pass was added.
-  /// @p printAfter    if true and adding a machine function pass add an extra
-  ///                  machine printer pass afterwards
-  /// @p verifyAfter   if true and adding a machine function pass add an extra
-  ///                  machine verification pass afterwards.
-  AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true,
-                     bool printAfter = true);
-
-  /// Add a pass to the PassManager if that pass is supposed to be run, as
-  /// determined by the StartAfter and StopAfter options. Takes ownership of the
-  /// pass.
-  /// @p printAfter    if true and adding a machine function pass add an extra
-  ///                  machine printer pass afterwards
-  /// @p verifyAfter   if true and adding a machine function pass add an extra
-  ///                  machine verification pass afterwards.
-  void addPass(Pass *P, bool verifyAfter = true, bool printAfter = true);
-
-  /// addMachinePasses helper to create the target-selected or overriden
-  /// regalloc pass.
-  FunctionPass *createRegAllocPass(bool Optimized);
-
-  /// printAndVerify - Add a pass to dump then verify the machine function, if
-  /// those steps are enabled.
-  ///
-  void printAndVerify(const std::string &Banner);
-
-  /// Add a pass to print the machine function if printing is enabled.
-  void addPrintPass(const std::string &Banner);
-
-  /// Add a pass to perform basic verification of the machine function if
-  /// verification is enabled.
-  void addVerifyPass(const std::string &Banner);
-};
-} // namespace llvm
+} // End llvm namespace
 
 /// List of target independent CodeGen pass IDs.
 namespace llvm {
@@ -457,6 +126,9 @@ namespace llvm {
   /// DeadMachineInstructionElim - This pass removes dead machine instructions.
   extern char &DeadMachineInstructionElimID;
 
+  /// This pass adds dead/undef flags after analyzing subregister lanes.
+  extern char &DetectDeadLanesID;
+
   /// FastRegisterAllocation Pass - This pass register allocates as fast as
   /// possible. It is best suited for debug code where live ranges are short.
   ///
@@ -480,11 +152,16 @@ namespace llvm {
   /// PrologEpilogCodeInserter - This pass inserts prolog and epilog code,
   /// and eliminates abstract frame references.
   extern char &PrologEpilogCodeInserterID;
+  MachineFunctionPass *createPrologEpilogInserterPass(const TargetMachine *TM);
 
   /// ExpandPostRAPseudos - This pass expands pseudo instructions after
   /// register allocation.
   extern char &ExpandPostRAPseudosID;
 
+  /// createPostRAHazardRecognizer - This pass runs the post-ra hazard
+  /// recognizer.
+  extern char &PostRAHazardRecognizerID;
+
   /// createPostRAScheduler - This pass performs post register allocation
   /// scheduling.
   extern char &PostRASchedulerID;
@@ -586,6 +263,13 @@ namespace llvm {
   /// \brief This pass lays out funclets contiguously.
   extern char &FuncletLayoutID;
 
+  /// This pass inserts the XRay instrumentation sleds if they are supported by
+  /// the target platform.
+  extern char &XRayInstrumentationID;
+
+  /// \brief This pass implements the "patchable-function" attribute.
+  extern char &PatchableFunctionID;
+
   /// createStackProtectorPass - This pass adds stack protectors to functions.
   ///
   FunctionPass *createStackProtectorPass(const TargetMachine *TM);
@@ -654,6 +338,42 @@ namespace llvm {
   /// memory accesses to target specific intrinsics.
   ///
   FunctionPass *createInterleavedAccessPass(const TargetMachine *TM);
+
+  /// LowerEmuTLS - This pass generates __emutls_[vt].xyz variables for all
+  /// TLS variables for the emulated TLS model.
+  ///
+  ModulePass *createLowerEmuTLSPass(const TargetMachine *TM);
+
+  /// This pass lowers the @llvm.load.relative intrinsic to instructions.
+  /// This is unsafe to do earlier because a pass may combine the constant
+  /// initializer into the load, which may result in an overflowing evaluation.
+  ModulePass *createPreISelIntrinsicLoweringPass();
+
+  /// GlobalMerge - This pass merges internal (by default) globals into structs
+  /// to enable reuse of a base pointer by indexed addressing modes.
+  /// It can also be configured to focus on size optimizations only.
+  ///
+  Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
+                              bool OnlyOptimizeForSize = false,
+                              bool MergeExternalByDefault = false);
+
+  /// This pass splits the stack into a safe stack and an unsafe stack to
+  /// protect against stack-based overflow vulnerabilities.
+  FunctionPass *createSafeStackPass(const TargetMachine *TM = nullptr);
+
+  /// This pass detects subregister lanes in a virtual register that are used
+  /// independently of other lanes and splits them into separate virtual
+  /// registers.
+  extern char &RenameIndependentSubregsID;
+
+  /// This pass is executed POST-RA to collect which physical registers are
+  /// preserved by given machine function.
+  FunctionPass *createRegUsageInfoCollector();
+
+  /// Return a MachineFunction pass that identifies call sites
+  /// and propagates register usage information of callee to caller
+  /// if available with PysicalRegisterUsageInfo pass.
+  FunctionPass *createRegUsageInfoPropPass();
 } // End llvm namespace
 
 /// Target machine pass initializer for passes with dependencies. Use with
@@ -662,15 +382,18 @@ namespace llvm {
 
 /// Target machine pass initializer for passes with dependencies. Use with
 /// INITIALIZE_TM_PASS_BEGIN.
-#define INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis) \
-    PassInfo *PI = new PassInfo(name, arg, & passName ::ID, \
-      PassInfo::NormalCtor_t(callDefaultCtor< passName >), cfg, analysis, \
-      PassInfo::TargetMachineCtor_t(callTargetMachineCtor< passName >)); \
-    Registry.registerPass(*PI, true); \
-    return PI; \
-  } \
-  void llvm::initialize##passName##Pass(PassRegistry &Registry) { \
-    CALL_ONCE_INITIALIZATION(initialize##passName##PassOnce) \
+#define INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis)             \
+  PassInfo *PI = new PassInfo(                                                 \
+      name, arg, &passName::ID,                                                \
+      PassInfo::NormalCtor_t(callDefaultCtor<passName>), cfg, analysis,        \
+      PassInfo::TargetMachineCtor_t(callTargetMachineCtor<passName>));         \
+  Registry.registerPass(*PI, true);                                            \
+  return PI;                                                                   \
+  }                                                                            \
+  LLVM_DEFINE_ONCE_FLAG(Initialize##passName##PassFlag);                       \
+  void llvm::initialize##passName##Pass(PassRegistry &Registry) {              \
+    llvm::call_once(Initialize##passName##PassFlag,                            \
+                    initialize##passName##PassOnce, std::ref(Registry));       \
   }
 
 /// This initializer registers TargetMachine constructor, so the pass being
@@ -678,8 +401,8 @@ namespace llvm {
 /// macro to be together with INITIALIZE_PASS, which is a complete target
 /// independent initializer, and we don't want to make libScalarOpts depend
 /// on libCodeGen.
-#define INITIALIZE_TM_PASS(passName, arg, name, cfg, analysis) \
-    INITIALIZE_TM_PASS_BEGIN(passName, arg, name, cfg, analysis) \
-    INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis)
+#define INITIALIZE_TM_PASS(passName, arg, name, cfg, analysis)                 \
+  INITIALIZE_TM_PASS_BEGIN(passName, arg, name, cfg, analysis)                 \
+  INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis)
 
 #endif
diff --git a/include/llvm/CodeGen/PreISelIntrinsicLowering.h b/include/llvm/CodeGen/PreISelIntrinsicLowering.h
new file mode 100644
index 0000000000000..765ca085244aa
--- /dev/null
+++ b/include/llvm/CodeGen/PreISelIntrinsicLowering.h
@@ -0,0 +1,26 @@
+//===--- PreISelIntrinsicLowering.h - Pre-ISel intrinsic lowering pass ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR lowering for the llvm.load.relative intrinsic.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_PREISELINTRINSICLOWERING_H
+#define LLVM_CODEGEN_PREISELINTRINSICLOWERING_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct PreISelIntrinsicLoweringPass
+    : PassInfoMixin<PreISelIntrinsicLoweringPass> {
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+}
+
+#endif // LLVM_CODEGEN_PREISELINTRINSICLOWERING_H
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index f67552030db4f..c3f6fde9fb3fa 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -27,6 +27,8 @@ class MachineMemOperand;
 class raw_ostream;
 
 raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
+class PseudoSourceValue;
+raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV);
 
 /// Special value supplied for machine level alias analysis. It indicates that
 /// a memory access references the functions stack frame (e.g., a spill slot),
@@ -45,6 +47,8 @@ public:
 
 private:
   PSVKind Kind;
+  friend raw_ostream &llvm::operator<<(raw_ostream &OS,
+                                       const PseudoSourceValue* PSV);
 
   friend class MachineMemOperand; // For printCustom().
 
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index 4122811a9e5ce..21952272ffdb9 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/PBQP/ReductionRules.h"
 #include "llvm/CodeGen/PBQPRAConstraint.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <set>
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index 9bbdf3e071bd6..aaddac40ca768 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -26,14 +26,22 @@ class LiveRange;
 class RegisterClassInfo;
 class MachineInstr;
 
+struct RegisterMaskPair {
+  unsigned RegUnit; ///< Virtual register or register unit.
+  LaneBitmask LaneMask;
+
+  RegisterMaskPair(unsigned RegUnit, LaneBitmask LaneMask)
+      : RegUnit(RegUnit), LaneMask(LaneMask) {}
+};
+
 /// Base class for register pressure results.
 struct RegisterPressure {
   /// Map of max reg pressure indexed by pressure set ID, not class ID.
   std::vector<unsigned> MaxSetPressure;
 
   /// List of live in virtual registers or physical register units.
-  SmallVector<unsigned,8> LiveInRegs;
-  SmallVector<unsigned,8> LiveOutRegs;
+  SmallVector<RegisterMaskPair,8> LiveInRegs;
+  SmallVector<RegisterMaskPair,8> LiveOutRegs;
 
   void dump(const TargetRegisterInfo *TRI) const;
 };
@@ -144,23 +152,32 @@ public:
 /// List of registers defined and used by a machine instruction.
 class RegisterOperands {
 public:
-  /// List of virtual regiserts and register units read by the instruction.
-  SmallVector<unsigned, 8> Uses;
+  /// List of virtual registers and register units read by the instruction.
+  SmallVector<RegisterMaskPair, 8> Uses;
   /// \brief List of virtual registers and register units defined by the
   /// instruction which are not dead.
-  SmallVector<unsigned, 8> Defs;
+  SmallVector<RegisterMaskPair, 8> Defs;
   /// \brief List of virtual registers and register units defined by the
   /// instruction but dead.
-  SmallVector<unsigned, 8> DeadDefs;
+  SmallVector<RegisterMaskPair, 8> DeadDefs;
 
   /// Analyze the given instruction \p MI and fill in the Uses, Defs and
   /// DeadDefs list based on the MachineOperand flags.
   void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI,
-               const MachineRegisterInfo &MRI, bool IgnoreDead = false);
+               const MachineRegisterInfo &MRI, bool TrackLaneMasks,
+               bool IgnoreDead);
 
   /// Use liveness information to find dead defs not marked with a dead flag
   /// and move them to the DeadDefs vector.
   void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS);
+
+  /// Use liveness information to find out which uses/defs are partially
+  /// undefined/dead and adjust the RegisterMaskPairs accordingly.
+  /// If \p AddFlagsMI is given then missing read-undef and dead flags will be
+  /// added to the instruction.
+  void adjustLaneLiveness(const LiveIntervals &LIS,
+                          const MachineRegisterInfo &MRI, SlotIndex Pos,
+                          MachineInstr *AddFlagsMI = nullptr);
 };
 
 /// Array of PressureDiffs.
@@ -225,7 +242,20 @@ struct RegPressureDelta {
 /// and virtual register indexes to an index usable by the sparse set.
 class LiveRegSet {
 private:
-  SparseSet<unsigned> Regs;
+  struct IndexMaskPair {
+    unsigned Index;
+    LaneBitmask LaneMask;
+
+    IndexMaskPair(unsigned Index, LaneBitmask LaneMask)
+        : Index(Index), LaneMask(LaneMask) {}
+
+    unsigned getSparseSetIndex() const {
+      return Index;
+    }
+  };
+
+  typedef SparseSet<IndexMaskPair> RegSet;
+  RegSet Regs;
   unsigned NumRegUnits;
 
   unsigned getSparseIndexFromReg(unsigned Reg) const {
@@ -244,19 +274,37 @@ public:
   void clear();
   void init(const MachineRegisterInfo &MRI);
 
-  bool contains(unsigned Reg) const {
+  LaneBitmask contains(unsigned Reg) const {
     unsigned SparseIndex = getSparseIndexFromReg(Reg);
-    return Regs.count(SparseIndex);
+    RegSet::const_iterator I = Regs.find(SparseIndex);
+    if (I == Regs.end())
+      return 0;
+    return I->LaneMask;
   }
 
-  bool insert(unsigned Reg) {
-    unsigned SparseIndex = getSparseIndexFromReg(Reg);
-    return Regs.insert(SparseIndex).second;
+  /// Mark the \p Pair.LaneMask lanes of \p Pair.Reg as live.
+  /// Returns the previously live lanes of \p Pair.Reg.
+  LaneBitmask insert(RegisterMaskPair Pair) {
+    unsigned SparseIndex = getSparseIndexFromReg(Pair.RegUnit);
+    auto InsertRes = Regs.insert(IndexMaskPair(SparseIndex, Pair.LaneMask));
+    if (!InsertRes.second) {
+      unsigned PrevMask = InsertRes.first->LaneMask;
+      InsertRes.first->LaneMask |= Pair.LaneMask;
+      return PrevMask;
+    }
+    return 0;
   }
 
-  bool erase(unsigned Reg) {
-    unsigned SparseIndex = getSparseIndexFromReg(Reg);
-    return Regs.erase(SparseIndex);
+  /// Clears the \p Pair.LaneMask lanes of \p Pair.Reg (mark them as dead).
+  /// Returns the previously live lanes of \p Pair.Reg.
+  LaneBitmask erase(RegisterMaskPair Pair) {
+    unsigned SparseIndex = getSparseIndexFromReg(Pair.RegUnit);
+    RegSet::iterator I = Regs.find(SparseIndex);
+    if (I == Regs.end())
+      return 0;
+    unsigned PrevMask = I->LaneMask;
+    I->LaneMask &= ~Pair.LaneMask;
+    return PrevMask;
   }
 
   size_t size() const {
@@ -265,9 +313,10 @@ public:
 
   template<typename ContainerT>
   void appendTo(ContainerT &To) const {
-    for (unsigned I : Regs) {
-      unsigned Reg = getRegFromSparseIndex(I);
-      To.push_back(Reg);
+    for (const IndexMaskPair &P : Regs) {
+      unsigned Reg = getRegFromSparseIndex(P.Index);
+      if (P.LaneMask != 0)
+        To.push_back(RegisterMaskPair(Reg, P.LaneMask));
     }
   }
 };
@@ -308,6 +357,9 @@ class RegPressureTracker {
   /// True if UntiedDefs will be populated.
   bool TrackUntiedDefs;
 
+  /// True if lanemasks should be tracked.
+  bool TrackLaneMasks;
+
   /// Register pressure corresponds to liveness before this instruction
   /// iterator. It may point to the end of the block or a DebugValue rather than
   /// an instruction.
@@ -327,23 +379,23 @@ class RegPressureTracker {
 public:
   RegPressureTracker(IntervalPressure &rp) :
     MF(nullptr), TRI(nullptr), RCI(nullptr), LIS(nullptr), MBB(nullptr), P(rp),
-    RequireIntervals(true), TrackUntiedDefs(false) {}
+    RequireIntervals(true), TrackUntiedDefs(false), TrackLaneMasks(false) {}
 
   RegPressureTracker(RegionPressure &rp) :
     MF(nullptr), TRI(nullptr), RCI(nullptr), LIS(nullptr), MBB(nullptr), P(rp),
-    RequireIntervals(false), TrackUntiedDefs(false) {}
+    RequireIntervals(false), TrackUntiedDefs(false), TrackLaneMasks(false) {}
 
   void reset();
 
   void init(const MachineFunction *mf, const RegisterClassInfo *rci,
             const LiveIntervals *lis, const MachineBasicBlock *mbb,
             MachineBasicBlock::const_iterator pos,
-            bool ShouldTrackUntiedDefs = false);
+            bool TrackLaneMasks, bool TrackUntiedDefs);
 
   /// Force liveness of virtual registers or physical register
   /// units. Particularly useful to initialize the livein/out state of the
   /// tracker before the first call to advance/recede.
-  void addLiveRegs(ArrayRef<unsigned> Regs);
+  void addLiveRegs(ArrayRef<RegisterMaskPair> Regs);
 
   /// Get the MI position corresponding to this register pressure.
   MachineBasicBlock::const_iterator getPos() const { return CurrPos; }
@@ -355,14 +407,14 @@ public:
   void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; }
 
   /// Recede across the previous instruction.
-  void recede(SmallVectorImpl<unsigned> *LiveUses = nullptr);
+  void recede(SmallVectorImpl<RegisterMaskPair> *LiveUses = nullptr);
 
   /// Recede across the previous instruction.
   /// This "low-level" variant assumes that recedeSkipDebugValues() was
   /// called previously and takes precomputed RegisterOperands for the
   /// instruction.
   void recede(const RegisterOperands &RegOpers,
-              SmallVectorImpl<unsigned> *LiveUses = nullptr);
+              SmallVectorImpl<RegisterMaskPair> *LiveUses = nullptr);
 
   /// Recede until we find an instruction which is not a DebugValue.
   void recedeSkipDebugValues();
@@ -370,6 +422,11 @@ public:
   /// Advance across the current instruction.
   void advance();
 
+  /// Advance across the current instruction.
+  /// This is a "low-level" variant of advance() which takes precomputed
+  /// RegisterOperands of the instruction.
+  void advance(const RegisterOperands &RegOpers);
+
   /// Finalize the region boundaries and recored live ins and live outs.
   void closeRegion();
 
@@ -469,18 +526,31 @@ public:
   void dump() const;
 
 protected:
-  void discoverLiveOut(unsigned Reg);
-  void discoverLiveIn(unsigned Reg);
+  /// Add Reg to the live out set and increase max pressure.
+  void discoverLiveOut(RegisterMaskPair Pair);
+  /// Add Reg to the live in set and increase max pressure.
+  void discoverLiveIn(RegisterMaskPair Pair);
 
   /// \brief Get the SlotIndex for the first nondebug instruction including or
   /// after the current position.
   SlotIndex getCurrSlot() const;
 
-  void increaseRegPressure(ArrayRef<unsigned> Regs);
-  void decreaseRegPressure(ArrayRef<unsigned> Regs);
+  void increaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask,
+                           LaneBitmask NewMask);
+  void decreaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask,
+                           LaneBitmask NewMask);
+
+  void bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs);
 
   void bumpUpwardPressure(const MachineInstr *MI);
   void bumpDownwardPressure(const MachineInstr *MI);
+
+  void discoverLiveInOrOut(RegisterMaskPair Pair,
+                           SmallVectorImpl<RegisterMaskPair> &LiveInOrOut);
+
+  LaneBitmask getLastUsedLanes(unsigned RegUnit, SlotIndex Pos) const;
+  LaneBitmask getLiveLanesAt(unsigned RegUnit, SlotIndex Pos) const;
+  LaneBitmask getLiveThroughAt(unsigned RegUnit, SlotIndex Pos) const;
 };
 
 void dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 122c78534253e..efe1a3c6d0f78 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -7,10 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the machine register scavenger class. It can provide
-// information such as unused register at any point in a machine basic block.
-// It also provides a mechanism to make registers available by evicting them
-// to spill slots.
+/// \file
+/// This file declares the machine register scavenger class. It can provide
+/// information such as unused register at any point in a machine basic block.
+/// It also provides a mechanism to make registers available by evicting them
+/// to spill slots.
 //
 //===----------------------------------------------------------------------===//
 
@@ -71,8 +72,8 @@ public:
   RegScavenger()
     : MBB(nullptr), NumRegUnits(0), Tracking(false) {}
 
-  /// Start tracking liveness from the begin of the specific basic block.
-  void enterBasicBlock(MachineBasicBlock *mbb);
+  /// Start tracking liveness from the begin of basic block \p MBB.
+  void enterBasicBlock(MachineBasicBlock &MBB);
 
   /// Move the internal MBB iterator and update register states.
   void forward();
diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h
new file mode 100644
index 0000000000000..3f88032cb6385
--- /dev/null
+++ b/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -0,0 +1,75 @@
+//==- RegisterUsageInfo.h - Register Usage Informartion Storage -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This pass is required to take advantage of the interprocedural register
+/// allocation infrastructure.
+///
+/// This pass is simple immutable pass which keeps RegMasks (calculated based on
+/// actual register allocation) for functions in a module and provides simple
+/// API to query this information.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H
+#define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class PhysicalRegisterUsageInfo : public ImmutablePass {
+  virtual void anchor();
+
+public:
+  static char ID;
+
+  PhysicalRegisterUsageInfo() : ImmutablePass(ID) {
+    PassRegistry &Registry = *PassRegistry::getPassRegistry();
+    initializePhysicalRegisterUsageInfoPass(Registry);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+  /// To set TargetMachine *, which is used to print
+  /// analysis when command line option -print-regusage is used.
+  void setTargetMachine(const TargetMachine *TM_) { TM = TM_; }
+
+  bool doInitialization(Module &M) override;
+
+  bool doFinalization(Module &M) override;
+
+  /// To store RegMask for given Function *.
+  void storeUpdateRegUsageInfo(const Function *FP,
+                               std::vector<uint32_t> RegMask);
+
+  /// To query stored RegMask for given Function *, it will return nullptr if
+  /// function is not known.
+  const std::vector<uint32_t> *getRegUsageInfo(const Function *FP);
+
+  void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+private:
+  /// A Dense map from Function * to RegMask.
+  /// In RegMask 0 means register used (clobbered) by function.
+  /// and 1 means content of register will be preserved around function call.
+  DenseMap<const Function *, std::vector<uint32_t>> RegMasks;
+
+  const TargetMachine *TM;
+};
+}
+
+#endif
diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h
index 0097e0472e5c5..9c8f5f487d382 100644
--- a/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -72,7 +72,7 @@ namespace llvm {
 
     /// Heuristics for estimating register pressure.
     unsigned ParallelLiveRanges;
-    signed HorizontalVerticalBalance;
+    int HorizontalVerticalBalance;
 
   public:
     ResourcePriorityQueue(SelectionDAGISel *IS);
@@ -103,14 +103,14 @@ namespace llvm {
 
     /// Single cost function reflecting benefit of scheduling SU
     /// in the current cycle.
-    signed SUSchedulingCost (SUnit *SU);
+    int SUSchedulingCost (SUnit *SU);
 
     /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
     ///
     void initNumRegDefsLeft(SUnit *SU);
     void updateNumRegDefsLeft(SUnit *SU);
-    signed regPressureDelta(SUnit *SU, bool RawPressure = false);
-    signed rawRegPressureDelta (SUnit *SU, unsigned RCId);
+    int regPressureDelta(SUnit *SU, bool RawPressure = false);
+    int rawRegPressureDelta (SUnit *SU, unsigned RCId);
 
     bool empty() const override { return Queue.empty(); }
 
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 7db03459f9bf6..16d305c7297f9 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -215,6 +215,8 @@ namespace RTLIB {
     FMAX_PPCF128,
 
     // CONVERSION
+    FPEXT_F32_PPCF128,
+    FPEXT_F64_PPCF128,
     FPEXT_F64_F128,
     FPEXT_F32_F128,
     FPEXT_F32_F64,
@@ -296,27 +298,35 @@ namespace RTLIB {
     OEQ_F32,
     OEQ_F64,
     OEQ_F128,
+    OEQ_PPCF128,
     UNE_F32,
     UNE_F64,
     UNE_F128,
+    UNE_PPCF128,
     OGE_F32,
     OGE_F64,
     OGE_F128,
+    OGE_PPCF128,
     OLT_F32,
     OLT_F64,
     OLT_F128,
+    OLT_PPCF128,
     OLE_F32,
     OLE_F64,
     OLE_F128,
+    OLE_PPCF128,
     OGT_F32,
     OGT_F64,
     OGT_F128,
+    OGT_PPCF128,
     UO_F32,
     UO_F64,
     UO_F128,
+    UO_PPCF128,
     O_F32,
     O_F64,
     O_F128,
+    O_PPCF128,
 
     // MEMORY
     MEMCPY,
@@ -326,7 +336,11 @@ namespace RTLIB {
     // EXCEPTION HANDLING
     UNWIND_RESUME,
 
-    // Family ATOMICs
+    // Note: there's two sets of atomics libcalls; see
+    // <http://llvm.org/docs/Atomics.html> for more info on the
+    // difference between them.
+
+    // Atomic '__sync_*' libcalls.
     SYNC_VAL_COMPARE_AND_SWAP_1,
     SYNC_VAL_COMPARE_AND_SWAP_2,
     SYNC_VAL_COMPARE_AND_SWAP_4,
@@ -388,9 +402,77 @@ namespace RTLIB {
     SYNC_FETCH_AND_UMIN_8,
     SYNC_FETCH_AND_UMIN_16,
 
+    // Atomic '__atomic_*' libcalls.
+    ATOMIC_LOAD,
+    ATOMIC_LOAD_1,
+    ATOMIC_LOAD_2,
+    ATOMIC_LOAD_4,
+    ATOMIC_LOAD_8,
+    ATOMIC_LOAD_16,
+
+    ATOMIC_STORE,
+    ATOMIC_STORE_1,
+    ATOMIC_STORE_2,
+    ATOMIC_STORE_4,
+    ATOMIC_STORE_8,
+    ATOMIC_STORE_16,
+
+    ATOMIC_EXCHANGE,
+    ATOMIC_EXCHANGE_1,
+    ATOMIC_EXCHANGE_2,
+    ATOMIC_EXCHANGE_4,
+    ATOMIC_EXCHANGE_8,
+    ATOMIC_EXCHANGE_16,
+
+    ATOMIC_COMPARE_EXCHANGE,
+    ATOMIC_COMPARE_EXCHANGE_1,
+    ATOMIC_COMPARE_EXCHANGE_2,
+    ATOMIC_COMPARE_EXCHANGE_4,
+    ATOMIC_COMPARE_EXCHANGE_8,
+    ATOMIC_COMPARE_EXCHANGE_16,
+
+    ATOMIC_FETCH_ADD_1,
+    ATOMIC_FETCH_ADD_2,
+    ATOMIC_FETCH_ADD_4,
+    ATOMIC_FETCH_ADD_8,
+    ATOMIC_FETCH_ADD_16,
+
+    ATOMIC_FETCH_SUB_1,
+    ATOMIC_FETCH_SUB_2,
+    ATOMIC_FETCH_SUB_4,
+    ATOMIC_FETCH_SUB_8,
+    ATOMIC_FETCH_SUB_16,
+
+    ATOMIC_FETCH_AND_1,
+    ATOMIC_FETCH_AND_2,
+    ATOMIC_FETCH_AND_4,
+    ATOMIC_FETCH_AND_8,
+    ATOMIC_FETCH_AND_16,
+
+    ATOMIC_FETCH_OR_1,
+    ATOMIC_FETCH_OR_2,
+    ATOMIC_FETCH_OR_4,
+    ATOMIC_FETCH_OR_8,
+    ATOMIC_FETCH_OR_16,
+
+    ATOMIC_FETCH_XOR_1,
+    ATOMIC_FETCH_XOR_2,
+    ATOMIC_FETCH_XOR_4,
+    ATOMIC_FETCH_XOR_8,
+    ATOMIC_FETCH_XOR_16,
+
+    ATOMIC_FETCH_NAND_1,
+    ATOMIC_FETCH_NAND_2,
+    ATOMIC_FETCH_NAND_4,
+    ATOMIC_FETCH_NAND_8,
+    ATOMIC_FETCH_NAND_16,
+
     // Stack Protector Fail.
     STACKPROTECTOR_CHECK_FAIL,
 
+    // Deoptimization.
+    DEOPTIMIZE,
+
     UNKNOWN_LIBCALL
   };
 
@@ -420,7 +502,7 @@ namespace RTLIB {
 
   /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or
   /// UNKNOWN_LIBCALL if there is none.
-  Libcall getATOMIC(unsigned Opc, MVT VT);
+  Libcall getSYNC(unsigned Opc, MVT VT);
 }
 }
 
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index bda9dbd51fff7..6469cabd3de1b 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -396,6 +396,17 @@ namespace llvm {
     /// specified node.
     bool addPred(const SDep &D, bool Required = true);
 
+    /// addPredBarrier - This adds a barrier edge to SU by calling
+    /// addPred(), with latency 0 generally or latency 1 for a store
+    /// followed by a load.
+    bool addPredBarrier(SUnit *SU) {
+      SDep Dep(SU, SDep::Barrier);
+      unsigned TrueMemOrderLatency =
+        ((SU->getInstr()->mayStore() && this->getInstr()->mayLoad()) ? 1 : 0);
+      Dep.setLatency(TrueMemOrderLatency);
+      return addPred(Dep);
+    }
+
     /// removePred - This removes the specified edge as a pred of the current
     /// node if it exists.  It also removes the current node as a successor of
     /// the specified node.
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index c574df0949119..12124ecc4b3e3 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -15,12 +15,14 @@
 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
 
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SparseMultiSet.h"
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include <list>
 
 namespace llvm {
   class MachineFrameInfo;
@@ -84,6 +86,15 @@ namespace llvm {
   typedef SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>
     VReg2SUnitOperIdxMultiMap;
 
+  typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
+  struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> {
+    UnderlyingObject(ValueType V, bool MayAlias)
+        : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {}
+    ValueType getValue() const { return getPointer(); }
+    bool mayAlias() const { return getInt(); }
+  };
+  typedef SmallVector<UnderlyingObject, 4> UnderlyingObjectsVector;
+
   /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
   /// MachineInstrs.
   class ScheduleDAGInstrs : public ScheduleDAG {
@@ -149,10 +160,66 @@ namespace llvm {
     /// Tracks the last instructions in this region using each virtual register.
     VReg2SUnitOperIdxMultiMap CurrentVRegUses;
 
-    /// PendingLoads - Remember where unknown loads are after the most recent
-    /// unknown store, as we iterate. As with Defs and Uses, this is here
-    /// to minimize construction/destruction.
-    std::vector<SUnit *> PendingLoads;
+    AliasAnalysis *AAForDep;
+
+    /// Remember a generic side-effecting instruction as we proceed.
+    /// No other SU ever gets scheduled around it (except in the special
+    /// case of a huge region that gets reduced).
+    SUnit *BarrierChain;
+
+  public:
+
+    /// A list of SUnits, used in Value2SUsMap, during DAG construction.
+    /// Note: to gain speed it might be worth investigating an optimized
+    /// implementation of this data structure, such as a singly linked list
+    /// with a memory pool (SmallVector was tried but slow and SparseSet is not
+    /// applicable).
+    typedef std::list<SUnit *> SUList;
+  protected:
+    /// A map from ValueType to SUList, used during DAG construction,
+    /// as a means of remembering which SUs depend on which memory
+    /// locations.
+    class Value2SUsMap;
+
+    /// Remove in FIFO order some SUs from huge maps.
+    void reduceHugeMemNodeMaps(Value2SUsMap &stores,
+                               Value2SUsMap &loads, unsigned N);
+
+    /// Add a chain edge between SUa and SUb, but only if both AliasAnalysis
+    /// and Target fail to deny the dependency.
+    void addChainDependency(SUnit *SUa, SUnit *SUb,
+                            unsigned Latency = 0);
+
+    /// Add dependencies as needed from all SUs in list to SU.
+    void addChainDependencies(SUnit *SU, SUList &sus, unsigned Latency) {
+      for (auto *su : sus)
+        addChainDependency(SU, su, Latency);
+    }
+
+    /// Add dependencies as needed from all SUs in map, to SU.
+    void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
+
+    /// Add dependencies as needed to SU, from all SUs mapped to V.
+    void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
+                              ValueType V);
+
+    /// Add barrier chain edges from all SUs in map, and then clear
+    /// the map. This is equivalent to insertBarrierChain(), but
+    /// optimized for the common case where the new BarrierChain (a
+    /// global memory object) has a higher NodeNum than all SUs in
+    /// map. It is assumed BarrierChain has been set before calling
+    /// this.
+    void addBarrierChain(Value2SUsMap &map);
+
+    /// Insert a barrier chain in a huge region, far below current
+    /// SU. Add barrier chain edges from all SUs in map with higher
+    /// NodeNums than this new BarrierChain, and remove them from
+    /// map. It is assumed BarrierChain has been set before calling
+    /// this.
+    void insertBarrierChain(Value2SUsMap &map);
+
+    /// For an unanalyzable memory access, this Value is used in maps.
+    UndefValue *UnknownValue;
 
     /// DbgValues - Remember instruction that precedes DBG_VALUE.
     /// These are generated by buildSchedGraph but persist so they can be
@@ -214,6 +281,7 @@ namespace llvm {
     void buildSchedGraph(AliasAnalysis *AA,
                          RegPressureTracker *RPTracker = nullptr,
                          PressureDiffs *PDiffs = nullptr,
+                         LiveIntervals *LIS = nullptr,
                          bool TrackLaneMasks = false);
 
     /// addSchedBarrierDeps - Add dependencies from instructions in the current
diff --git a/include/llvm/CodeGen/ScheduleDAGMutation.h b/include/llvm/CodeGen/ScheduleDAGMutation.h
new file mode 100644
index 0000000000000..02fe2294815c4
--- /dev/null
+++ b/include/llvm/CodeGen/ScheduleDAGMutation.h
@@ -0,0 +1,31 @@
+//==- ScheduleDAGMutation.h - MachineInstr Scheduling ------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGMutation class, which represents
+// a target-specific mutation of the dependency graph for scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SCHEDULEDAGMUTATION_H
+#define LLVM_CODEGEN_SCHEDULEDAGMUTATION_H
+
+namespace llvm {
+  class ScheduleDAGInstrs;
+
+  /// Mutate the DAG as a postpass after normal DAG building.
+  class ScheduleDAGMutation {
+    virtual void anchor();
+  public:
+    virtual ~ScheduleDAGMutation() {}
+
+    virtual void apply(ScheduleDAGInstrs *DAG) = 0;
+  };
+}
+
+#endif
diff --git a/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
index 8a40e7212ff6d..214be2794ba37 100644
--- a/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -17,6 +17,7 @@
 
 namespace llvm {
 
+class MachineInstr;
 class SUnit;
 
 /// HazardRecognizer - This determines whether or not an instruction can be
@@ -70,6 +71,10 @@ public:
   /// emitted, to advance the hazard state.
   virtual void EmitInstruction(SUnit *) {}
 
+  /// This overload will be used when the hazard recognizer is being used
+  /// by a non-scheduling pass, which does not use SUnits.
+  virtual void EmitInstruction(MachineInstr *) {}
+
   /// PreEmitNoops - This callback is invoked prior to emitting an instruction.
   /// It should return the number of noops to emit prior to the provided
   /// instruction.
@@ -79,6 +84,12 @@ public:
     return 0;
   }
 
+  /// This overload will be used when the hazard recognizer is being used
+  /// by a non-scheduling pass, which does not use SUnits.
+  virtual unsigned PreEmitNoops(MachineInstr *) {
+    return 0;
+  }
+
   /// ShouldPreferAnother - This callback may be invoked if getHazardType
   /// returns NoHazard. If, even though there is no hazard, it would be better to
   /// schedule another available instruction, this callback should return true.
diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index ab14c2de32b0b..e0c30fe4d82a6 100644
--- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -83,11 +83,9 @@ class ScoreboardHazardRecognizer : public ScheduleHazardRecognizer {
     void dump() const;
   };
 
-#ifndef NDEBUG
   // Support for tracing ScoreboardHazardRecognizer as a component within
-  // another module. Follows the current thread-unsafe model of tracing.
-  static const char *DebugType;
-#endif
+  // another module.
+  const char *DebugType;
 
   // Itinerary data for the target.
   const InstrItineraryData *ItinData;
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index a21e9ae881a75..29cce873c2f3c 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/ArrayRecycler.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
@@ -37,7 +38,7 @@ class MachineFunction;
 class MDNode;
 class SDDbgValue;
 class TargetLowering;
-class TargetSelectionDAGInfo;
+class SelectionDAGTargetInfo;
 
 class SDVTListNode : public FoldingSetNode {
   friend struct FoldingSetTrait<SDVTListNode>;
@@ -178,7 +179,7 @@ void checkForCycles(const SelectionDAG *DAG, bool force = false);
 ///
 class SelectionDAG {
   const TargetMachine &TM;
-  const TargetSelectionDAGInfo *TSI;
+  const SelectionDAGTargetInfo *TSI;
   const TargetLowering *TLI;
   MachineFunction *MF;
   LLVMContext *Context;
@@ -208,6 +209,7 @@ class SelectionDAG {
 
   /// Pool allocation for machine-opcode SDNode operands.
   BumpPtrAllocator OperandAllocator;
+  ArrayRecycler<SDUse> OperandRecycler;
 
   /// Pool allocation for misc. objects that are created once per SelectionDAG.
   BumpPtrAllocator Allocator;
@@ -247,6 +249,14 @@ public:
     virtual void NodeUpdated(SDNode *N);
   };
 
+  struct DAGNodeDeletedListener : public DAGUpdateListener {
+    std::function<void(SDNode *, SDNode *)> Callback;
+    DAGNodeDeletedListener(SelectionDAG &DAG,
+                           std::function<void(SDNode *, SDNode *)> Callback)
+        : DAGUpdateListener(DAG), Callback(Callback) {}
+    void NodeDeleted(SDNode *N, SDNode *E) override { Callback(N, E); }
+  };
+
   /// When true, additional steps are taken to
   /// ensure that getConstant() and similar functions return DAG nodes that
   /// have legal types. This is important after type legalization since
@@ -268,6 +278,36 @@ private:
                               DenseSet<SDNode *> &visited,
                               int level, bool &printed);
 
+  template <typename SDNodeT, typename... ArgTypes>
+  SDNodeT *newSDNode(ArgTypes &&... Args) {
+    return new (NodeAllocator.template Allocate<SDNodeT>())
+        SDNodeT(std::forward<ArgTypes>(Args)...);
+  }
+
+  void createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
+    assert(!Node->OperandList && "Node already has operands");
+    SDUse *Ops = OperandRecycler.allocate(
+        ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+
+    for (unsigned I = 0; I != Vals.size(); ++I) {
+      Ops[I].setUser(Node);
+      Ops[I].setInitial(Vals[I]);
+    }
+    Node->NumOperands = Vals.size();
+    Node->OperandList = Ops;
+    checkForCycles(Node);
+  }
+
+  void removeOperands(SDNode *Node) {
+    if (!Node->OperandList)
+      return;
+    OperandRecycler.deallocate(
+        ArrayRecycler<SDUse>::Capacity::get(Node->NumOperands),
+        Node->OperandList);
+    Node->NumOperands = 0;
+    Node->OperandList = nullptr;
+  }
+
   void operator=(const SelectionDAG&) = delete;
   SelectionDAG(const SelectionDAG&) = delete;
 
@@ -287,7 +327,7 @@ public:
   const TargetMachine &getTarget() const { return TM; }
   const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); }
   const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
-  const TargetSelectionDAGInfo &getSelectionDAGInfo() const { return *TSI; }
+  const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
   LLVMContext *getContext() const {return Context; }
 
   /// Pop up a GraphViz/gv window with the DAG rendered using 'dot'.
@@ -427,45 +467,64 @@ public:
   //===--------------------------------------------------------------------===//
   // Node creation methods.
   //
-  SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget = false,
-                      bool isOpaque = false);
-  SDValue getConstant(const APInt &Val, SDLoc DL, EVT VT, bool isTarget = false,
-                      bool isOpaque = false);
-  SDValue getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
+
+  /// \brief Create a ConstantSDNode wrapping a constant value.
+  /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR.
+  ///
+  /// If only legal types can be produced, this does the necessary
+  /// transformations (e.g., if the vector element type is illegal).
+  /// @{
+  SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
+                      bool isTarget = false, bool isOpaque = false);
+  SDValue getConstant(const APInt &Val, const SDLoc &DL, EVT VT,
                       bool isTarget = false, bool isOpaque = false);
-  SDValue getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget = false);
-  SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT,
+  SDValue getConstant(const ConstantInt &Val, const SDLoc &DL, EVT VT,
+                      bool isTarget = false, bool isOpaque = false);
+  SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL,
+                            bool isTarget = false);
+  SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT,
                             bool isOpaque = false) {
     return getConstant(Val, DL, VT, true, isOpaque);
   }
-  SDValue getTargetConstant(const APInt &Val, SDLoc DL, EVT VT,
+  SDValue getTargetConstant(const APInt &Val, const SDLoc &DL, EVT VT,
                             bool isOpaque = false) {
     return getConstant(Val, DL, VT, true, isOpaque);
   }
-  SDValue getTargetConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
+  SDValue getTargetConstant(const ConstantInt &Val, const SDLoc &DL, EVT VT,
                             bool isOpaque = false) {
     return getConstant(Val, DL, VT, true, isOpaque);
   }
-  // The forms below that take a double should only be used for simple
-  // constants that can be exactly represented in VT.  No checks are made.
-  SDValue getConstantFP(double Val, SDLoc DL, EVT VT, bool isTarget = false);
-  SDValue getConstantFP(const APFloat& Val, SDLoc DL, EVT VT,
+  /// @}
+
+  /// \brief Create a ConstantFPSDNode wrapping a constant value.
+  /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR.
+  ///
+  /// If only legal types can be produced, this does the necessary
+  /// transformations (e.g., if the vector element type is illegal).
+  /// The forms that take a double should only be used for simple constants
+  /// that can be exactly represented in VT.  No checks are made.
+  /// @{
+  SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT,
                         bool isTarget = false);
-  SDValue getConstantFP(const ConstantFP &CF, SDLoc DL, EVT VT,
+  SDValue getConstantFP(const APFloat &Val, const SDLoc &DL, EVT VT,
                         bool isTarget = false);
-  SDValue getTargetConstantFP(double Val, SDLoc DL, EVT VT) {
+  SDValue getConstantFP(const ConstantFP &CF, const SDLoc &DL, EVT VT,
+                        bool isTarget = false);
+  SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT) {
     return getConstantFP(Val, DL, VT, true);
   }
-  SDValue getTargetConstantFP(const APFloat& Val, SDLoc DL, EVT VT) {
+  SDValue getTargetConstantFP(const APFloat &Val, const SDLoc &DL, EVT VT) {
     return getConstantFP(Val, DL, VT, true);
   }
-  SDValue getTargetConstantFP(const ConstantFP &Val, SDLoc DL, EVT VT) {
+  SDValue getTargetConstantFP(const ConstantFP &Val, const SDLoc &DL, EVT VT) {
     return getConstantFP(Val, DL, VT, true);
   }
-  SDValue getGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT,
+  /// @}
+
+  SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT,
                            int64_t offset = 0, bool isTargetGA = false,
                            unsigned char TargetFlags = 0);
-  SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT,
+  SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT,
                                  int64_t offset = 0,
                                  unsigned char TargetFlags = 0) {
     return getGlobalAddress(GV, DL, VT, offset, true, TargetFlags);
@@ -502,7 +561,7 @@ public:
   SDValue getBasicBlock(MachineBasicBlock *MBB);
   SDValue getBasicBlock(MachineBasicBlock *MBB, SDLoc dl);
   SDValue getExternalSymbol(const char *Sym, EVT VT);
-  SDValue getExternalSymbol(const char *Sym, SDLoc dl, EVT VT);
+  SDValue getExternalSymbol(const char *Sym, const SDLoc &dl, EVT VT);
   SDValue getTargetExternalSymbol(const char *Sym, EVT VT,
                                   unsigned char TargetFlags = 0);
   SDValue getMCSymbol(MCSymbol *Sym, EVT VT);
@@ -510,7 +569,7 @@ public:
   SDValue getValueType(EVT);
   SDValue getRegister(unsigned Reg, EVT VT);
   SDValue getRegisterMask(const uint32_t *RegMask);
-  SDValue getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label);
+  SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label);
   SDValue getBlockAddress(const BlockAddress *BA, EVT VT,
                           int64_t Offset = 0, bool isTarget = false,
                           unsigned char TargetFlags = 0);
@@ -520,7 +579,8 @@ public:
     return getBlockAddress(BA, VT, Offset, true, TargetFlags);
   }
 
-  SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N) {
+  SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg,
+                       SDValue N) {
     return getNode(ISD::CopyToReg, dl, MVT::Other, Chain,
                    getRegister(Reg, N.getValueType()), N);
   }
@@ -528,7 +588,7 @@ public:
   // This version of the getCopyToReg method takes an extra operand, which
   // indicates that there is potentially an incoming glue value (if Glue is not
   // null) and that there should be a glue result.
-  SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N,
+  SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N,
                        SDValue Glue) {
     SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Glue };
@@ -537,15 +597,15 @@ public:
   }
 
   // Similar to last getCopyToReg() except parameter Reg is a SDValue
-  SDValue getCopyToReg(SDValue Chain, SDLoc dl, SDValue Reg, SDValue N,
-                         SDValue Glue) {
+  SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, SDValue Reg, SDValue N,
+                       SDValue Glue) {
     SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, Reg, N, Glue };
     return getNode(ISD::CopyToReg, dl, VTs,
                    makeArrayRef(Ops, Glue.getNode() ? 4 : 3));
   }
 
-  SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT) {
+  SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT) {
     SDVTList VTs = getVTList(VT, MVT::Other);
     SDValue Ops[] = { Chain, getRegister(Reg, VT) };
     return getNode(ISD::CopyFromReg, dl, VTs, Ops);
@@ -554,8 +614,8 @@ public:
   // This version of the getCopyFromReg method takes an extra operand, which
   // indicates that there is potentially an incoming glue value (if Glue is not
   // null) and that there should be a glue result.
-  SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT,
-                           SDValue Glue) {
+  SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT,
+                         SDValue Glue) {
     SDVTList VTs = getVTList(VT, MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, getRegister(Reg, VT), Glue };
     return getNode(ISD::CopyFromReg, dl, VTs,
@@ -566,20 +626,46 @@ public:
 
   /// Returns the ConvertRndSat Note: Avoid using this node because it may
   /// disappear in the future and most targets don't support it.
-  SDValue getConvertRndSat(EVT VT, SDLoc dl, SDValue Val, SDValue DTy,
-                           SDValue STy,
-                           SDValue Rnd, SDValue Sat, ISD::CvtCode Code);
+  SDValue getConvertRndSat(EVT VT, const SDLoc &dl, SDValue Val, SDValue DTy,
+                           SDValue STy, SDValue Rnd, SDValue Sat,
+                           ISD::CvtCode Code);
 
   /// Return an ISD::VECTOR_SHUFFLE node. The number of elements in VT,
   /// which must be a vector type, must match the number of mask elements
   /// NumElts. An integer mask element equal to -1 is treated as undefined.
-  SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2,
-                           const int *MaskElts);
-  SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2,
-                           ArrayRef<int> MaskElts) {
-    assert(VT.getVectorNumElements() == MaskElts.size() &&
-           "Must have the same number of vector elements as mask elements!");
-    return getVectorShuffle(VT, dl, N1, N2, MaskElts.data());
+  SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2,
+                           ArrayRef<int> Mask);
+
+  /// Return an ISD::BUILD_VECTOR node. The number of elements in VT,
+  /// which must be a vector type, must match the number of operands in Ops.
+  /// The operands must have the same type as (or, for integers, a type wider
+  /// than) VT's element type.
+  SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef<SDValue> Ops) {
+    // VerifySDNode (via InsertNode) checks BUILD_VECTOR later.
+    return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+  }
+
+  /// Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all
+  /// elements. VT must be a vector type. Op's type must be the same as (or,
+  /// for integers, a type wider than) VT's element type.
+  SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op) {
+    // VerifySDNode (via InsertNode) checks BUILD_VECTOR later.
+    if (Op.getOpcode() == ISD::UNDEF) {
+      assert((VT.getVectorElementType() == Op.getValueType() ||
+              (VT.isInteger() &&
+               VT.getVectorElementType().bitsLE(Op.getValueType()))) &&
+             "A splatted value must have a width equal or (for integers) "
+             "greater than the vector element type!");
+      return getNode(ISD::UNDEF, SDLoc(), VT);
+    }
+
+    SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Op);
+    return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+  }
+
+  /// Return a splat ISD::BUILD_VECTOR node, but with Op's SDLoc.
+  SDValue getSplatBuildVector(EVT VT, SDValue Op) {
+    return getSplatBuildVector(VT, SDLoc(Op), Op);
   }
 
   /// \brief Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to
@@ -590,52 +676,52 @@ public:
 
   /// Convert Op, which must be of integer type, to the
   /// integer type VT, by either any-extending or truncating it.
-  SDValue getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT);
+  SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
 
   /// Convert Op, which must be of integer type, to the
   /// integer type VT, by either sign-extending or truncating it.
-  SDValue getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT);
+  SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
 
   /// Convert Op, which must be of integer type, to the
   /// integer type VT, by either zero-extending or truncating it.
-  SDValue getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT);
+  SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
 
   /// Return the expression required to zero extend the Op
   /// value assuming it was the smaller SrcTy value.
-  SDValue getZeroExtendInReg(SDValue Op, SDLoc DL, EVT SrcTy);
+  SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT SrcTy);
 
   /// Return an operation which will any-extend the low lanes of the operand
   /// into the specified vector type. For example,
   /// this can convert a v16i8 into a v4i32 by any-extending the low four
   /// lanes of the operand from i8 to i32.
-  SDValue getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT);
+  SDValue getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT);
 
   /// Return an operation which will sign extend the low lanes of the operand
   /// into the specified vector type. For example,
   /// this can convert a v16i8 into a v4i32 by sign extending the low four
   /// lanes of the operand from i8 to i32.
-  SDValue getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT);
+  SDValue getSignExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT);
 
   /// Return an operation which will zero extend the low lanes of the operand
   /// into the specified vector type. For example,
   /// this can convert a v16i8 into a v4i32 by zero extending the low four
   /// lanes of the operand from i8 to i32.
-  SDValue getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT);
+  SDValue getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT);
 
   /// Convert Op, which must be of integer type, to the integer type VT,
   /// by using an extension appropriate for the target's
   /// BooleanContent for type OpVT or truncating it.
-  SDValue getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, EVT OpVT);
+  SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT);
 
   /// Create a bitwise NOT operation as (XOR Val, -1).
-  SDValue getNOT(SDLoc DL, SDValue Val, EVT VT);
+  SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT);
 
   /// \brief Create a logical NOT operation as (XOR Val, BooleanOne).
-  SDValue getLogicalNOT(SDLoc DL, SDValue Val, EVT VT);
+  SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT);
 
   /// Return a new CALLSEQ_START node, which always must have a glue result
   /// (to ensure it's not CSE'd).  CALLSEQ_START does not have a useful SDLoc.
-  SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, SDLoc DL) {
+  SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, const SDLoc &DL) {
     SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain,  Op };
     return getNode(ISD::CALLSEQ_START, DL, VTs, Ops);
@@ -645,7 +731,7 @@ public:
   /// glue result (to ensure it's not CSE'd).
   /// CALLSEQ_END does not have a useful SDLoc.
   SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2,
-                           SDValue InGlue, SDLoc DL) {
+                         SDValue InGlue, const SDLoc &DL) {
     SDVTList NodeTys = getVTList(MVT::Other, MVT::Glue);
     SmallVector<SDValue, 4> Ops;
     Ops.push_back(Chain);
@@ -668,38 +754,38 @@ public:
 
   /// Gets or creates the specified node.
   ///
-  SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
                   ArrayRef<SDUse> Ops);
-  SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
                   ArrayRef<SDValue> Ops, const SDNodeFlags *Flags = nullptr);
-  SDValue getNode(unsigned Opcode, SDLoc DL, ArrayRef<EVT> ResultTys,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, ArrayRef<EVT> ResultTys,
                   ArrayRef<SDValue> Ops);
-  SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs,
                   ArrayRef<SDValue> Ops);
 
   // Specialize based on number of operands.
-  SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT);
-  SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N);
-  SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
-                  const SDNodeFlags *Flags = nullptr);
-  SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
-                  SDValue N3);
-  SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
-                  SDValue N3, SDValue N4);
-  SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
-                  SDValue N3, SDValue N4, SDValue N5);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+                  SDValue N2, const SDNodeFlags *Flags = nullptr);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+                  SDValue N2, SDValue N3);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+                  SDValue N2, SDValue N3, SDValue N4);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+                  SDValue N2, SDValue N3, SDValue N4, SDValue N5);
 
   // Specialize again based on number of operands for nodes with a VTList
   // rather than a single VT.
-  SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs);
-  SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, SDValue N);
-  SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, SDValue N1,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1,
                   SDValue N2);
-  SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, SDValue N1,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1,
                   SDValue N2, SDValue N3);
-  SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, SDValue N1,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1,
                   SDValue N2, SDValue N3, SDValue N4);
-  SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, SDValue N1,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1,
                   SDValue N2, SDValue N3, SDValue N4, SDValue N5);
 
   /// Compute a TokenFactor to force all the incoming stack arguments to be
@@ -707,24 +793,24 @@ public:
   /// stack arguments from being clobbered.
   SDValue getStackArgumentTokenFactor(SDValue Chain);
 
-  SDValue getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src,
+  SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
                     SDValue Size, unsigned Align, bool isVol, bool AlwaysInline,
                     bool isTailCall, MachinePointerInfo DstPtrInfo,
                     MachinePointerInfo SrcPtrInfo);
 
-  SDValue getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src,
+  SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
                      SDValue Size, unsigned Align, bool isVol, bool isTailCall,
                      MachinePointerInfo DstPtrInfo,
                      MachinePointerInfo SrcPtrInfo);
 
-  SDValue getMemset(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src,
+  SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
                     SDValue Size, unsigned Align, bool isVol, bool isTailCall,
                     MachinePointerInfo DstPtrInfo);
 
   /// Helper function to make it easier to build SetCC's if you just
   /// have an ISD::CondCode instead of an SDValue.
   ///
-  SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
+  SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS,
                    ISD::CondCode Cond) {
     assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() &&
       "Cannot compare scalars to vectors");
@@ -737,8 +823,8 @@ public:
 
   /// Helper function to make it easier to build Select's if you just
   /// have operands and don't want to check for vector.
-  SDValue getSelect(SDLoc DL, EVT VT, SDValue Cond,
-                    SDValue LHS, SDValue RHS) {
+  SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS,
+                    SDValue RHS) {
     assert(LHS.getValueType() == RHS.getValueType() &&
            "Cannot use select on differing types");
     assert(VT.isVector() == LHS.getValueType().isVector() &&
@@ -750,139 +836,145 @@ public:
   /// Helper function to make it easier to build SelectCC's if you
   /// just have an ISD::CondCode instead of an SDValue.
   ///
-  SDValue getSelectCC(SDLoc DL, SDValue LHS, SDValue RHS,
-                      SDValue True, SDValue False, ISD::CondCode Cond) {
+  SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True,
+                      SDValue False, ISD::CondCode Cond) {
     return getNode(ISD::SELECT_CC, DL, True.getValueType(),
                    LHS, RHS, True, False, getCondCode(Cond));
   }
 
   /// VAArg produces a result and token chain, and takes a pointer
   /// and a source value as input.
-  SDValue getVAArg(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
+  SDValue getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
                    SDValue SV, unsigned Align);
 
   /// Gets a node for an atomic cmpxchg op. There are two
   /// valid Opcodes. ISD::ATOMIC_CMO_SWAP produces the value loaded and a
   /// chain result. ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS produces the value loaded,
   /// a success flag (initially i1), and a chain.
-  SDValue getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs,
-                           SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp,
-                           MachinePointerInfo PtrInfo, unsigned Alignment,
-                           AtomicOrdering SuccessOrdering,
+  SDValue getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+                           SDVTList VTs, SDValue Chain, SDValue Ptr,
+                           SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
+                           unsigned Alignment, AtomicOrdering SuccessOrdering,
                            AtomicOrdering FailureOrdering,
                            SynchronizationScope SynchScope);
-  SDValue getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs,
-                           SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp,
-                           MachineMemOperand *MMO,
+  SDValue getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+                           SDVTList VTs, SDValue Chain, SDValue Ptr,
+                           SDValue Cmp, SDValue Swp, MachineMemOperand *MMO,
                            AtomicOrdering SuccessOrdering,
                            AtomicOrdering FailureOrdering,
                            SynchronizationScope SynchScope);
 
   /// Gets a node for an atomic op, produces result (if relevant)
   /// and chain and takes 2 operands.
-  SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain,
+  SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain,
                     SDValue Ptr, SDValue Val, const Value *PtrVal,
                     unsigned Alignment, AtomicOrdering Ordering,
                     SynchronizationScope SynchScope);
-  SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain,
+  SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain,
                     SDValue Ptr, SDValue Val, MachineMemOperand *MMO,
-                    AtomicOrdering Ordering,
-                    SynchronizationScope SynchScope);
+                    AtomicOrdering Ordering, SynchronizationScope SynchScope);
 
   /// Gets a node for an atomic op, produces result and chain and
   /// takes 1 operand.
-  SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT,
+  SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, EVT VT,
                     SDValue Chain, SDValue Ptr, MachineMemOperand *MMO,
-                    AtomicOrdering Ordering,
-                    SynchronizationScope SynchScope);
+                    AtomicOrdering Ordering, SynchronizationScope SynchScope);
 
   /// Gets a node for an atomic op, produces result and chain and takes N
   /// operands.
-  SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTList,
-                    ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
-                    AtomicOrdering SuccessOrdering,
+  SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+                    SDVTList VTList, ArrayRef<SDValue> Ops,
+                    MachineMemOperand *MMO, AtomicOrdering SuccessOrdering,
                     AtomicOrdering FailureOrdering,
                     SynchronizationScope SynchScope);
-  SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTList,
-                    ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
-                    AtomicOrdering Ordering, SynchronizationScope SynchScope);
+  SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+                    SDVTList VTList, ArrayRef<SDValue> Ops,
+                    MachineMemOperand *MMO, AtomicOrdering Ordering,
+                    SynchronizationScope SynchScope);
 
   /// Creates a MemIntrinsicNode that may produce a
   /// result and takes a list of operands. Opcode may be INTRINSIC_VOID,
   /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not
   /// less than FIRST_TARGET_MEMORY_OPCODE.
-  SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
-                              ArrayRef<SDValue> Ops,
-                              EVT MemVT, MachinePointerInfo PtrInfo,
-                              unsigned Align = 0, bool Vol = false,
-                              bool ReadMem = true, bool WriteMem = true,
-                              unsigned Size = 0);
+  SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList,
+                              ArrayRef<SDValue> Ops, EVT MemVT,
+                              MachinePointerInfo PtrInfo, unsigned Align = 0,
+                              bool Vol = false, bool ReadMem = true,
+                              bool WriteMem = true, unsigned Size = 0);
 
-  SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
-                              ArrayRef<SDValue> Ops,
-                              EVT MemVT, MachineMemOperand *MMO);
+  SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList,
+                              ArrayRef<SDValue> Ops, EVT MemVT,
+                              MachineMemOperand *MMO);
 
   /// Create a MERGE_VALUES node from the given operands.
-  SDValue getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl);
+  SDValue getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl);
 
   /// Loads are not normal binary operators: their result type is not
   /// determined by their operands, and they produce a value AND a token chain.
   ///
-  SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
-                  MachinePointerInfo PtrInfo, bool isVolatile,
-                  bool isNonTemporal, bool isInvariant, unsigned Alignment,
+  /// This function will set the MOLoad flag on MMOFlags, but you can set it if
+  /// you want.  The MOStore flag must not be set.
+  SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
+                  MachinePointerInfo PtrInfo, unsigned Alignment = 0,
+                  MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
                   const AAMDNodes &AAInfo = AAMDNodes(),
                   const MDNode *Ranges = nullptr);
-  SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
+  SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
                   MachineMemOperand *MMO);
-  SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
-                     SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo,
-                     EVT MemVT, bool isVolatile,
-                     bool isNonTemporal, bool isInvariant, unsigned Alignment,
-                     const AAMDNodes &AAInfo = AAMDNodes());
-  SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
+  SDValue
+  getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain,
+             SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT,
+             unsigned Alignment = 0,
+             MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+             const AAMDNodes &AAInfo = AAMDNodes());
+  SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT,
                      SDValue Chain, SDValue Ptr, EVT MemVT,
                      MachineMemOperand *MMO);
-  SDValue getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base,
+  SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base,
                          SDValue Offset, ISD::MemIndexedMode AM);
-  SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
-                  EVT VT, SDLoc dl,
-                  SDValue Chain, SDValue Ptr, SDValue Offset,
-                  MachinePointerInfo PtrInfo, EVT MemVT,
-                  bool isVolatile, bool isNonTemporal, bool isInvariant,
-                  unsigned Alignment, const AAMDNodes &AAInfo = AAMDNodes(),
+  SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+                  const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+                  MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment = 0,
+                  MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+                  const AAMDNodes &AAInfo = AAMDNodes(),
                   const MDNode *Ranges = nullptr);
-  SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
-                  EVT VT, SDLoc dl,
-                  SDValue Chain, SDValue Ptr, SDValue Offset,
+  SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+                  const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
                   EVT MemVT, MachineMemOperand *MMO);
 
   /// Helper function to build ISD::STORE nodes.
-  SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
-                   MachinePointerInfo PtrInfo, bool isVolatile,
-                   bool isNonTemporal, unsigned Alignment,
-                   const AAMDNodes &AAInfo = AAMDNodes());
-  SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
+  ///
+  /// This function will set the MOStore flag on MMOFlags, but you can set it if
+  /// you want.  The MOLoad and MOInvariant flags must not be set.
+  SDValue
+  getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+           MachinePointerInfo PtrInfo, unsigned Alignment = 0,
+           MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+           const AAMDNodes &AAInfo = AAMDNodes());
+  SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
                    MachineMemOperand *MMO);
-  SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
-                        MachinePointerInfo PtrInfo, EVT TVT,
-                        bool isNonTemporal, bool isVolatile,
-                        unsigned Alignment,
-                        const AAMDNodes &AAInfo = AAMDNodes());
-  SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
-                        EVT TVT, MachineMemOperand *MMO);
-  SDValue getIndexedStore(SDValue OrigStoe, SDLoc dl, SDValue Base,
-                           SDValue Offset, ISD::MemIndexedMode AM);
-
-  SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
+  SDValue
+  getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+                MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment = 0,
+                MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+                const AAMDNodes &AAInfo = AAMDNodes());
+  SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+                        SDValue Ptr, EVT TVT, MachineMemOperand *MMO);
+  SDValue getIndexedStore(SDValue OrigStoe, const SDLoc &dl, SDValue Base,
+                          SDValue Offset, ISD::MemIndexedMode AM);
+
+  /// Returns sum of the base pointer and offset.
+  SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, const SDLoc &DL);
+
+  SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
                         SDValue Mask, SDValue Src0, EVT MemVT,
                         MachineMemOperand *MMO, ISD::LoadExtType);
-  SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
+  SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val,
                          SDValue Ptr, SDValue Mask, EVT MemVT,
                          MachineMemOperand *MMO, bool IsTrunc);
-  SDValue getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl,
+  SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
                           ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
-  SDValue getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl,
+  SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
                            ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
   /// Construct a node to track a Value* through the backend.
   SDValue getSrcValue(const Value *v);
@@ -895,8 +987,8 @@ public:
   SDValue getBitcast(EVT VT, SDValue V);
 
   /// Return an AddrSpaceCastSDNode.
-  SDValue getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
-                           unsigned SrcAS, unsigned DestAS);
+  SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS,
+                           unsigned DestAS);
 
   /// Return the specified value casted to
   /// the target's desired shift amount type.
@@ -965,45 +1057,46 @@ public:
   /// Note that getMachineNode returns the resultant node.  If there is already
   /// a node of the specified opcode and operands, it returns that node instead
   /// of the current one.
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT,
-                                SDValue Op1);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT,
-                                SDValue Op1, SDValue Op2);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT,
-                                SDValue Op1, SDValue Op2, SDValue Op3);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT,
-                                ArrayRef<SDValue> Ops);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2,
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT);
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT,
                                 SDValue Op1);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2,
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT,
                                 SDValue Op1, SDValue Op2);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2,
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT,
                                 SDValue Op1, SDValue Op2, SDValue Op3);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2,
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT,
                                 ArrayRef<SDValue> Ops);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2,
-                                EVT VT3, SDValue Op1, SDValue Op2);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2,
-                                EVT VT3, SDValue Op1, SDValue Op2,
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
+                                EVT VT2);
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
+                                EVT VT2, SDValue Op1);
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
+                                EVT VT2, SDValue Op1, SDValue Op2);
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
+                                EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
+                                EVT VT2, ArrayRef<SDValue> Ops);
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
+                                EVT VT2, EVT VT3, SDValue Op1, SDValue Op2);
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
+                                EVT VT2, EVT VT3, SDValue Op1, SDValue Op2,
                                 SDValue Op3);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2,
-                                EVT VT3, ArrayRef<SDValue> Ops);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2,
-                                EVT VT3, EVT VT4, ArrayRef<SDValue> Ops);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl,
-                                ArrayRef<EVT> ResultTys,
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
+                                EVT VT2, EVT VT3, ArrayRef<SDValue> Ops);
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
+                                EVT VT2, EVT VT3, EVT VT4,
                                 ArrayRef<SDValue> Ops);
-  MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, SDVTList VTs,
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl,
+                                ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops);
+  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, SDVTList VTs,
                                 ArrayRef<SDValue> Ops);
 
   /// A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
-  SDValue getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT,
+  SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT,
                                  SDValue Operand);
 
   /// A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
-  SDValue getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,
+  SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
                                 SDValue Operand, SDValue Subreg);
 
   /// Get the specified node if it's already available, or else return NULL.
@@ -1012,16 +1105,17 @@ public:
 
   /// Creates a SDDbgValue node.
   SDDbgValue *getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R,
-                          bool IsIndirect, uint64_t Off, DebugLoc DL,
+                          bool IsIndirect, uint64_t Off, const DebugLoc &DL,
                           unsigned O);
 
   /// Constant
   SDDbgValue *getConstantDbgValue(MDNode *Var, MDNode *Expr, const Value *C,
-                                  uint64_t Off, DebugLoc DL, unsigned O);
+                                  uint64_t Off, const DebugLoc &DL, unsigned O);
 
   /// FrameIndex
   SDDbgValue *getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, unsigned FI,
-                                    uint64_t Off, DebugLoc DL, unsigned O);
+                                    uint64_t Off, const DebugLoc &DL,
+                                    unsigned O);
 
   /// Remove the specified node from the system. If any of its
   /// operands then becomes dead, remove them as well. Inform UpdateListener
@@ -1129,9 +1223,11 @@ public:
     return DbgInfo->getSDDbgValues(SD);
   }
 
-  /// Transfer SDDbgValues.
+private:
+  /// Transfer SDDbgValues. Called via ReplaceAllUses{OfValue}?With
   void TransferDbgValues(SDValue From, SDValue To);
 
+public:
   /// Return true if there are any SDDbgValue nodes associated
   /// with this SelectionDAG.
   bool hasDebugValues() const { return !DbgInfo->empty(); }
@@ -1147,29 +1243,32 @@ public:
 
   void dump() const;
 
-  /// Create a stack temporary, suitable for holding the
-  /// specified value type.  If minAlign is specified, the slot size will have
-  /// at least that alignment.
+  /// Create a stack temporary, suitable for holding the specified value type.
+  /// If minAlign is specified, the slot size will have at least that alignment.
   SDValue CreateStackTemporary(EVT VT, unsigned minAlign = 1);
 
-  /// Create a stack temporary suitable for holding
-  /// either of the specified value types.
+  /// Create a stack temporary suitable for holding either of the specified
+  /// value types.
   SDValue CreateStackTemporary(EVT VT1, EVT VT2);
 
-  SDValue FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
+  SDValue FoldSymbolOffset(unsigned Opcode, EVT VT,
+                           const GlobalAddressSDNode *GA,
+                           const SDNode *N2);
+
+  SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
                                  SDNode *Cst1, SDNode *Cst2);
 
-  SDValue FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
+  SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
                                  const ConstantSDNode *Cst1,
                                  const ConstantSDNode *Cst2);
 
-  SDValue FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
-                                       EVT VT, ArrayRef<SDValue> Ops,
+  SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
+                                       ArrayRef<SDValue> Ops,
                                        const SDNodeFlags *Flags = nullptr);
 
   /// Constant fold a setcc to true or false.
-  SDValue FoldSetCC(EVT VT, SDValue N1,
-                    SDValue N2, ISD::CondCode Cond, SDLoc dl);
+  SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond,
+                    const SDLoc &dl);
 
   /// Return true if the sign bit of Op is known to be zero.
   /// We use this predicate to simplify operations downstream.
@@ -1188,27 +1287,32 @@ public:
   void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne,
                         unsigned Depth = 0) const;
 
-  /// Return the number of times the sign bit of the
-  /// register is replicated into the other bits.  We know that at least 1 bit
-  /// is always equal to the sign bit (itself), but other cases can give us
-  /// information.  For example, immediately after an "SRA X, 2", we know that
-  /// the top 3 bits are all equal to each other, so we return 3.  Targets can
-  /// implement the ComputeNumSignBitsForTarget method in the TargetLowering
-  /// class to allow target nodes to be understood.
+  /// Test if the given value is known to have exactly one bit set. This differs
+  /// from computeKnownBits in that it doesn't necessarily determine which bit
+  /// is set.
+  bool isKnownToBeAPowerOfTwo(SDValue Val) const;
+
+  /// Return the number of times the sign bit of the register is replicated into
+  /// the other bits. We know that at least 1 bit is always equal to the sign
+  /// bit (itself), but other cases can give us information. For example,
+  /// immediately after an "SRA X, 2", we know that the top 3 bits are all equal
+  /// to each other, so we return 3. Targets can implement the
+  /// ComputeNumSignBitsForTarget method in the TargetLowering class to allow
+  /// target nodes to be understood.
   unsigned ComputeNumSignBits(SDValue Op, unsigned Depth = 0) const;
 
-  /// Return true if the specified operand is an
-  /// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
-  /// ISD::OR with a ConstantSDNode that is guaranteed to have the same
-  /// semantics as an ADD.  This handles the equivalence:
+  /// Return true if the specified operand is an ISD::ADD with a ConstantSDNode
+  /// on the right-hand side, or if it is an ISD::OR with a ConstantSDNode that
+  /// is guaranteed to have the same semantics as an ADD. This handles the
+  /// equivalence:
   ///     X|Cst == X+Cst iff X&Cst = 0.
   bool isBaseWithConstantOffset(SDValue Op) const;
 
   /// Test whether the given SDValue is known to never be NaN.
   bool isKnownNeverNaN(SDValue Op) const;
 
-  /// Test whether the given SDValue is known to never be
-  /// positive or negative Zero.
+  /// Test whether the given SDValue is known to never be positive or negative
+  /// zero.
   bool isKnownNeverZero(SDValue Op) const;
 
   /// Test whether two SDValues are known to compare equal. This
@@ -1228,10 +1332,12 @@ public:
   /// vector op and fill the end of the resulting vector with UNDEFS.
   SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0);
 
-  /// Return true if LD is loading 'Bytes' bytes from a location that is 'Dist'
-  /// units away from the location that the 'Base' load is loading from.
-  bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
-                         unsigned Bytes, int Dist) const;
+  /// Return true if loads are next to each other and can be
+  /// merged. Check that both are nonvolatile and if LD is loading
+  /// 'Bytes' bytes from a location that is 'Dist' units away from the
+  /// location that the 'Base' load is loading from.
+  bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
+                                      unsigned Bytes, int Dist) const;
 
   /// Infer alignment of a load / store address. Return 0 if
   /// it cannot be inferred.
@@ -1265,8 +1371,12 @@ public:
   void ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args,
                              unsigned Start = 0, unsigned Count = 0);
 
+  /// Compute the default alignment value for the given type.
   unsigned getEVTAlignment(EVT MemoryVT) const;
 
+  /// Test whether the given value is a constant int or similar node.
+  SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N);
+
 private:
   void InsertNode(SDNode *N);
   bool RemoveNodeFromCSEMaps(SDNode *N);
@@ -1276,16 +1386,16 @@ private:
                                void *&InsertPos);
   SDNode *FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
                                void *&InsertPos);
-  SDNode *UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc loc);
+  SDNode *UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &loc);
 
   void DeleteNodeNotInCSEMaps(SDNode *N);
   void DeallocateNode(SDNode *N);
 
   void allnodes_clear();
 
-  BinarySDNode *GetBinarySDNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
-                                SDValue N1, SDValue N2,
-                                const SDNodeFlags *Flags = nullptr);
+  SDNode *GetBinarySDNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs,
+                          SDValue N1, SDValue N2,
+                          const SDNodeFlags *Flags = nullptr);
 
   /// Look up the node specified by ID in CSEMap.  If it exists, return it.  If
   /// not, return the insertion token that will make insertion faster.  This
@@ -1296,7 +1406,7 @@ private:
   /// Look up the node specified by ID in CSEMap.  If it exists, return it.  If
   /// not, return the insertion token that will make insertion faster.  Performs
   /// additional processing for constant nodes.
-  SDNode *FindNodeOrInsertPos(const FoldingSetNodeID &ID, DebugLoc DL,
+  SDNode *FindNodeOrInsertPos(const FoldingSetNodeID &ID, const SDLoc &DL,
                               void *&InsertPos);
 
   /// List of non-single value types.
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index a011e4c338c4a..7f4549d3058f4 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -76,8 +76,8 @@ public:
   /// right after selection.
   virtual void PostprocessISelDAG() {}
 
-  /// Select - Main hook targets implement to select a node.
-  virtual SDNode *Select(SDNode *N) = 0;
+  /// Main hook for targets to transform nodes into machine nodes.
+  virtual void Select(SDNode *N) = 0;
 
   /// SelectInlineAsmMemoryOperand - Select the specified address as a target
   /// addressing mode, according to the specified constraint.  If this does
@@ -111,6 +111,8 @@ public:
     OPC_RecordMemRef,
     OPC_CaptureGlueInput,
     OPC_MoveChild,
+    OPC_MoveChild0, OPC_MoveChild1, OPC_MoveChild2, OPC_MoveChild3,
+    OPC_MoveChild4, OPC_MoveChild5, OPC_MoveChild6, OPC_MoveChild7,
     OPC_MoveParent,
     OPC_CheckSame,
     OPC_CheckChild0Same, OPC_CheckChild1Same,
@@ -140,11 +142,15 @@ public:
     OPC_EmitMergeInputChains,
     OPC_EmitMergeInputChains1_0,
     OPC_EmitMergeInputChains1_1,
+    OPC_EmitMergeInputChains1_2,
     OPC_EmitCopyToReg,
     OPC_EmitNodeXForm,
     OPC_EmitNode,
+    // Space-optimized forms that implicitly encode number of result VTs.
+    OPC_EmitNode0, OPC_EmitNode1, OPC_EmitNode2,
     OPC_MorphNodeTo,
-    OPC_MarkGlueResults,
+    // Space-optimized forms that implicitly encode number of result VTs.
+    OPC_MorphNodeTo0, OPC_MorphNodeTo1, OPC_MorphNodeTo2,
     OPC_CompleteMatch
   };
 
@@ -196,11 +202,16 @@ protected:
     CurDAG->ReplaceAllUsesWith(F, T);
   }
 
+  /// Replace all uses of \c F with \c T, then remove \c F from the DAG.
+  void ReplaceNode(SDNode *F, SDNode *T) {
+    CurDAG->ReplaceAllUsesWith(F, T);
+    CurDAG->RemoveDeadNode(F);
+  }
 
   /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
   /// by tblgen.  Others should not call it.
-  void SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL);
-
+  void SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
+                                     const SDLoc &DL);
 
 public:
   // Calls to these predicates are generated by tblgen.
@@ -236,9 +247,8 @@ public:
     llvm_unreachable("Tblgen should generate this!");
   }
 
-  SDNode *SelectCodeCommon(SDNode *NodeToMatch,
-                           const unsigned char *MatcherTable,
-                           unsigned TableSize);
+  void SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
+                        unsigned TableSize);
 
   /// \brief Return true if complex patterns for this target can mutate the
   /// DAG.
@@ -249,10 +259,10 @@ public:
 private:
 
   // Calls to these functions are generated by tblgen.
-  SDNode *Select_INLINEASM(SDNode *N);
-  SDNode *Select_READ_REGISTER(SDNode *N);
-  SDNode *Select_WRITE_REGISTER(SDNode *N);
-  SDNode *Select_UNDEF(SDNode *N);
+  void Select_INLINEASM(SDNode *N);
+  void Select_READ_REGISTER(SDNode *N);
+  void Select_WRITE_REGISTER(SDNode *N);
+  void Select_UNDEF(SDNode *N);
   void CannotYetSelect(SDNode *N);
 
 private:
@@ -294,11 +304,9 @@ private:
   /// state machines that start with a OPC_SwitchOpcode node.
   std::vector<unsigned> OpcodeOffset;
 
-  void UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
-                           const SmallVectorImpl<SDNode*> &ChainNodesMatched,
-                           SDValue InputGlue, const SmallVectorImpl<SDNode*> &F,
-                           bool isMorphNodeTo);
-
+  void UpdateChains(SDNode *NodeToMatch, SDValue InputChain,
+                    const SmallVectorImpl<SDNode *> &ChainNodesMatched,
+                    bool isMorphNodeTo);
 };
 
 }
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 536fc656e8e2e..cfcc4117f93b6 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -44,7 +44,7 @@ class GlobalValue;
 class MachineBasicBlock;
 class MachineConstantPoolValue;
 class SDNode;
-class BinaryWithFlagsSDNode;
+class HandleSDNode;
 class Value;
 class MCSymbol;
 template <typename T> struct DenseMapInfo;
@@ -66,24 +66,28 @@ struct SDVTList {
 namespace ISD {
   /// Node predicates
 
-  /// Return true if the specified node is a
-  /// BUILD_VECTOR where all of the elements are ~0 or undef.
+  /// If N is a BUILD_VECTOR node whose elements are all the same constant or
+  /// undefined, return true and return the constant value in \p SplatValue.
+  bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
+
+  /// Return true if the specified node is a BUILD_VECTOR where all of the
+  /// elements are ~0 or undef.
   bool isBuildVectorAllOnes(const SDNode *N);
 
-  /// Return true if the specified node is a
-  /// BUILD_VECTOR where all of the elements are 0 or undef.
+  /// Return true if the specified node is a BUILD_VECTOR where all of the
+  /// elements are 0 or undef.
   bool isBuildVectorAllZeros(const SDNode *N);
 
-  /// \brief Return true if the specified node is a BUILD_VECTOR node of
-  /// all ConstantSDNode or undef.
+  /// Return true if the specified node is a BUILD_VECTOR node of all
+  /// ConstantSDNode or undef.
   bool isBuildVectorOfConstantSDNodes(const SDNode *N);
 
-  /// \brief Return true if the specified node is a BUILD_VECTOR node of
-  /// all ConstantFPSDNode or undef.
+  /// Return true if the specified node is a BUILD_VECTOR node of all
+  /// ConstantFPSDNode or undef.
   bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
 
-  /// Return true if the node has at least one operand
-  /// and all operands of the specified node are ISD::UNDEF.
+  /// Return true if the node has at least one operand and all operands of the
+  /// specified node are ISD::UNDEF.
   bool allOperandsUndef(const SDNode *N);
 }  // end llvm:ISD namespace
 
@@ -280,6 +284,8 @@ public:
 private:
   friend class SelectionDAG;
   friend class SDNode;
+  // TODO: unfriend HandleSDNode once we fix its operand handling.
+  friend class HandleSDNode;
 
   void setUser(SDNode *p) { User = p; }
 
@@ -328,6 +334,7 @@ private:
   bool NoInfs : 1;
   bool NoSignedZeros : 1;
   bool AllowReciprocal : 1;
+  bool VectorReduction : 1;
 
 public:
   /// Default constructor turns off all optimization flags.
@@ -340,6 +347,7 @@ public:
     NoInfs = false;
     NoSignedZeros = false;
     AllowReciprocal = false;
+    VectorReduction = false;
   }
 
   // These are mutators for each flag.
@@ -351,6 +359,7 @@ public:
   void setNoInfs(bool b) { NoInfs = b; }
   void setNoSignedZeros(bool b) { NoSignedZeros = b; }
   void setAllowReciprocal(bool b) { AllowReciprocal = b; }
+  void setVectorReduction(bool b) { VectorReduction = b; }
 
   // These are accessors for each flag.
   bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
@@ -361,6 +370,7 @@ public:
   bool hasNoInfs() const { return NoInfs; }
   bool hasNoSignedZeros() const { return NoSignedZeros; }
   bool hasAllowReciprocal() const { return AllowReciprocal; }
+  bool hasVectorReduction() const { return VectorReduction; }
 
   /// Return a raw encoding of the flags.
   /// This function should only be used to add data to the NodeID value.
@@ -390,10 +400,6 @@ private:
   /// The operation that this node performs.
   int16_t NodeType;
 
-  /// This is true if OperandList was new[]'d.  If true,
-  /// then they will be delete[]'d when the node is destroyed.
-  uint16_t OperandsNeedDelete : 1;
-
   /// This tracks whether this node has one or more dbg_value
   /// nodes corresponding to it.
   uint16_t HasDebugValue : 1;
@@ -402,7 +408,7 @@ protected:
   /// This member is defined by this class, but is not used for
   /// anything.  Subclasses can use it to hold whatever state they find useful.
   /// This field is initialized to zero by the ctor.
-  uint16_t SubclassData : 14;
+  uint16_t SubclassData : 15;
 
 private:
   /// Unique id per SDNode in the DAG.
@@ -436,6 +442,8 @@ private:
 
   friend class SelectionDAG;
   friend struct ilist_traits<SDNode>;
+  // TODO: unfriend HandleSDNode once we fix its operand handling.
+  friend class HandleSDNode;
 
 public:
   /// Unique and persistent id per SDNode in the DAG.
@@ -621,18 +629,32 @@ public:
   /// NOTE: This is an expensive method. Use it carefully.
   bool hasPredecessor(const SDNode *N) const;
 
-  /// Return true if N is a predecessor of this node.
-  /// N is either an operand of this node, or can be reached by recursively
-  /// traversing up the operands.
-  /// In this helper the Visited and worklist sets are held externally to
-  /// cache predecessors over multiple invocations. If you want to test for
-  /// multiple predecessors this method is preferable to multiple calls to
-  /// hasPredecessor. Be sure to clear Visited and Worklist if the DAG
-  /// changes.
-  /// NOTE: This is still very expensive. Use carefully.
-  bool hasPredecessorHelper(const SDNode *N,
-                            SmallPtrSetImpl<const SDNode *> &Visited,
-                            SmallVectorImpl<const SDNode *> &Worklist) const;
+  /// Returns true if N is a predecessor of any node in Worklist. This
+  /// helper keeps Visited and Worklist sets externally to allow unions
+  /// searches to be performed in parallel, caching of results across
+  /// queries and incremental addition to Worklist. Stops early if N is
+  /// found but will resume. Remember to clear Visited and Worklists
+  /// if DAG changes.
+  static bool hasPredecessorHelper(const SDNode *N,
+                                   SmallPtrSetImpl<const SDNode *> &Visited,
+                                   SmallVectorImpl<const SDNode *> &Worklist) {
+    if (Visited.count(N))
+      return true;
+    while (!Worklist.empty()) {
+      const SDNode *M = Worklist.pop_back_val();
+      bool Found = false;
+      for (const SDValue &OpV : M->op_values()) {
+        SDNode *Op = OpV.getNode();
+        if (Visited.insert(Op).second)
+          Worklist.push_back(Op);
+        if (Op == N)
+          Found = true;
+      }
+      if (Found)
+        return true;
+    }
+    return false;
+  }
 
   /// Return the number of values used by this operation.
   unsigned getNumOperands() const { return NumOperands; }
@@ -788,101 +810,20 @@ protected:
     return Ret;
   }
 
-  SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
-         ArrayRef<SDValue> Ops)
-      : NodeType(Opc), OperandsNeedDelete(true), HasDebugValue(false),
-        SubclassData(0), NodeId(-1),
-        OperandList(Ops.size() ? new SDUse[Ops.size()] : nullptr),
-        ValueList(VTs.VTs), UseList(nullptr), NumOperands(Ops.size()),
-        NumValues(VTs.NumVTs), IROrder(Order), debugLoc(std::move(dl)) {
-    assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
-    assert(NumOperands == Ops.size() &&
-           "NumOperands wasn't wide enough for its operands!");
-    assert(NumValues == VTs.NumVTs &&
-           "NumValues wasn't wide enough for its operands!");
-    for (unsigned i = 0; i != Ops.size(); ++i) {
-      assert(OperandList && "no operands available");
-      OperandList[i].setUser(this);
-      OperandList[i].setInitial(Ops[i]);
-    }
-    checkForCycles(this);
-  }
-
-  /// This constructor adds no operands itself; operands can be
-  /// set later with InitOperands.
+  /// Create an SDNode.
+  ///
+  /// SDNodes are created without any operands, and never own the operand
+  /// storage. To add operands, see SelectionDAG::createOperands.
   SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
-      : NodeType(Opc), OperandsNeedDelete(false), HasDebugValue(false),
-        SubclassData(0), NodeId(-1), OperandList(nullptr), ValueList(VTs.VTs),
-        UseList(nullptr), NumOperands(0), NumValues(VTs.NumVTs),
-        IROrder(Order), debugLoc(std::move(dl)) {
+      : NodeType(Opc), HasDebugValue(false), SubclassData(0), NodeId(-1),
+        OperandList(nullptr), ValueList(VTs.VTs), UseList(nullptr),
+        NumOperands(0), NumValues(VTs.NumVTs), IROrder(Order),
+        debugLoc(std::move(dl)) {
     assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
     assert(NumValues == VTs.NumVTs &&
            "NumValues wasn't wide enough for its operands!");
   }
 
-  /// Initialize the operands list of this with 1 operand.
-  void InitOperands(SDUse *Ops, const SDValue &Op0) {
-    Ops[0].setUser(this);
-    Ops[0].setInitial(Op0);
-    NumOperands = 1;
-    OperandList = Ops;
-    checkForCycles(this);
-  }
-
-  /// Initialize the operands list of this with 2 operands.
-  void InitOperands(SDUse *Ops, const SDValue &Op0, const SDValue &Op1) {
-    Ops[0].setUser(this);
-    Ops[0].setInitial(Op0);
-    Ops[1].setUser(this);
-    Ops[1].setInitial(Op1);
-    NumOperands = 2;
-    OperandList = Ops;
-    checkForCycles(this);
-  }
-
-  /// Initialize the operands list of this with 3 operands.
-  void InitOperands(SDUse *Ops, const SDValue &Op0, const SDValue &Op1,
-                    const SDValue &Op2) {
-    Ops[0].setUser(this);
-    Ops[0].setInitial(Op0);
-    Ops[1].setUser(this);
-    Ops[1].setInitial(Op1);
-    Ops[2].setUser(this);
-    Ops[2].setInitial(Op2);
-    NumOperands = 3;
-    OperandList = Ops;
-    checkForCycles(this);
-  }
-
-  /// Initialize the operands list of this with 4 operands.
-  void InitOperands(SDUse *Ops, const SDValue &Op0, const SDValue &Op1,
-                    const SDValue &Op2, const SDValue &Op3) {
-    Ops[0].setUser(this);
-    Ops[0].setInitial(Op0);
-    Ops[1].setUser(this);
-    Ops[1].setInitial(Op1);
-    Ops[2].setUser(this);
-    Ops[2].setInitial(Op2);
-    Ops[3].setUser(this);
-    Ops[3].setInitial(Op3);
-    NumOperands = 4;
-    OperandList = Ops;
-    checkForCycles(this);
-  }
-
-  /// Initialize the operands list of this with N operands.
-  void InitOperands(SDUse *Ops, const SDValue *Vals, unsigned N) {
-    for (unsigned i = 0; i != N; ++i) {
-      Ops[i].setUser(this);
-      Ops[i].setInitial(Vals[i]);
-    }
-    NumOperands = N;
-    assert(NumOperands == N &&
-           "NumOperands wasn't wide enough for its operands!");
-    OperandList = Ops;
-    checkForCycles(this);
-  }
-
   /// Release the operands and set this node to have zero operands.
   void DropOperands();
 };
@@ -898,40 +839,20 @@ protected:
 /// be used by the DAGBuilder, the other to be used by others.
 class SDLoc {
 private:
-  // Ptr could be used for either Instruction* or SDNode*. It is used for
-  // Instruction* if IROrder is not -1.
-  const void *Ptr;
-  int IROrder;
+  DebugLoc DL;
+  int IROrder = 0;
 
 public:
-  SDLoc() : Ptr(nullptr), IROrder(0) {}
-  SDLoc(const SDNode *N) : Ptr(N), IROrder(-1) {
-    assert(N && "null SDNode");
-  }
-  SDLoc(const SDValue V) : Ptr(V.getNode()), IROrder(-1) {
-    assert(Ptr && "null SDNode");
-  }
-  SDLoc(const Instruction *I, int Order) : Ptr(I), IROrder(Order) {
+  SDLoc() = default;
+  SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
+  SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
+  SDLoc(const Instruction *I, int Order) : IROrder(Order) {
     assert(Order >= 0 && "bad IROrder");
+    if (I)
+      DL = I->getDebugLoc();
   }
-  unsigned getIROrder() {
-    if (IROrder >= 0 || Ptr == nullptr) {
-      return (unsigned)IROrder;
-    }
-    const SDNode *N = (const SDNode*)(Ptr);
-    return N->getIROrder();
-  }
-  DebugLoc getDebugLoc() {
-    if (!Ptr) {
-      return DebugLoc();
-    }
-    if (IROrder >= 0) {
-      const Instruction *I = (const Instruction*)(Ptr);
-      return I->getDebugLoc();
-    }
-    const SDNode *N = (const SDNode*)(Ptr);
-    return N->getDebugLoc();
-  }
+  unsigned getIROrder() const { return IROrder; }
+  const DebugLoc &getDebugLoc() const { return DL; }
 };
 
 
@@ -1008,30 +929,6 @@ inline void SDUse::setNode(SDNode *N) {
   if (N) N->addUse(*this);
 }
 
-/// This class is used for single-operand SDNodes.  This is solely
-/// to allow co-allocation of node operands with the node itself.
-class UnarySDNode : public SDNode {
-  SDUse Op;
-public:
-  UnarySDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
-              SDValue X)
-    : SDNode(Opc, Order, dl, VTs) {
-    InitOperands(&Op, X);
-  }
-};
-
-/// This class is used for two-operand SDNodes.  This is solely
-/// to allow co-allocation of node operands with the node itself.
-class BinarySDNode : public SDNode {
-  SDUse Ops[2];
-public:
-  BinarySDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
-               SDValue X, SDValue Y)
-    : SDNode(Opc, Order, dl, VTs) {
-    InitOperands(Ops, X, Y);
-  }
-};
-
 /// Returns true if the opcode is a binary operation with flags.
 static bool isBinOpWithFlags(unsigned Opcode) {
   switch (Opcode) {
@@ -1056,30 +953,17 @@ static bool isBinOpWithFlags(unsigned Opcode) {
 
 /// This class is an extension of BinarySDNode
 /// used from those opcodes that have associated extra flags.
-class BinaryWithFlagsSDNode : public BinarySDNode {
+class BinaryWithFlagsSDNode : public SDNode {
 public:
   SDNodeFlags Flags;
-  BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
-                        SDValue X, SDValue Y, const SDNodeFlags &NodeFlags)
-      : BinarySDNode(Opc, Order, dl, VTs, X, Y), Flags(NodeFlags) {}
+  BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
+                        SDVTList VTs, const SDNodeFlags &NodeFlags)
+      : SDNode(Opc, Order, dl, VTs), Flags(NodeFlags) {}
   static bool classof(const SDNode *N) {
     return isBinOpWithFlags(N->getOpcode());
   }
 };
 
-/// This class is used for three-operand SDNodes. This is solely
-/// to allow co-allocation of node operands with the node itself.
-class TernarySDNode : public SDNode {
-  SDUse Ops[3];
-public:
-  TernarySDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
-                SDValue X, SDValue Y, SDValue Z)
-    : SDNode(Opc, Order, dl, VTs) {
-    InitOperands(Ops, X, Y, Z);
-  }
-};
-
-
 /// This class is used to form a handle around another node that
 /// is persistent and is updated across invocations of replaceAllUsesWith on its
 /// operand.  This node should be directly created by end-users and not added to
@@ -1092,19 +976,27 @@ public:
     // HandleSDNodes are never inserted into the DAG, so they won't be
     // auto-numbered. Use ID 65535 as a sentinel.
     PersistentId = 0xffff;
-    InitOperands(&Op, X);
+
+    // Manually set up the operand list. This node type is special in that it's
+    // always stack allocated and SelectionDAG does not manage its operands.
+    // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
+    // be so special.
+    Op.setUser(this);
+    Op.setInitial(X);
+    NumOperands = 1;
+    OperandList = &Op;
   }
   ~HandleSDNode();
   const SDValue &getValue() const { return Op; }
 };
 
-class AddrSpaceCastSDNode : public UnarySDNode {
+class AddrSpaceCastSDNode : public SDNode {
 private:
   unsigned SrcAddrSpace;
   unsigned DestAddrSpace;
 
 public:
-  AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT, SDValue X,
+  AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
                       unsigned SrcAS, unsigned DestAS);
 
   unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
@@ -1126,12 +1018,9 @@ protected:
   MachineMemOperand *MMO;
 
 public:
-  MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
+  MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
             EVT MemoryVT, MachineMemOperand *MMO);
 
-  MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
-            ArrayRef<SDValue> Ops, EVT MemoryVT, MachineMemOperand *MMO);
-
   bool readMem() const { return MMO->isLoad(); }
   bool writeMem() const { return MMO->isStore(); }
 
@@ -1234,8 +1123,6 @@ public:
 
 /// This is an SDNode representing atomic operations.
 class AtomicSDNode : public MemSDNode {
-  SDUse Ops[4];
-
   /// For cmpxchg instructions, the ordering requirements when a store does not
   /// occur.
   AtomicOrdering FailureOrdering;
@@ -1244,13 +1131,15 @@ class AtomicSDNode : public MemSDNode {
                   AtomicOrdering FailureOrdering,
                   SynchronizationScope SynchScope) {
     // This must match encodeMemSDNodeFlags() in SelectionDAG.cpp.
-    assert((SuccessOrdering & 15) == SuccessOrdering &&
+    assert((AtomicOrdering)((unsigned)SuccessOrdering & 15) ==
+               SuccessOrdering &&
            "Ordering may not require more than 4 bits!");
-    assert((FailureOrdering & 15) == FailureOrdering &&
+    assert((AtomicOrdering)((unsigned)FailureOrdering & 15) ==
+               FailureOrdering &&
            "Ordering may not require more than 4 bits!");
     assert((SynchScope & 1) == SynchScope &&
            "SynchScope may not require more than 1 bit!");
-    SubclassData |= SuccessOrdering << 8;
+    SubclassData |= (unsigned)SuccessOrdering << 8;
     SubclassData |= SynchScope << 12;
     this->FailureOrdering = FailureOrdering;
     assert(getSuccessOrdering() == SuccessOrdering &&
@@ -1261,50 +1150,12 @@ class AtomicSDNode : public MemSDNode {
   }
 
 public:
-  // Opc:   opcode for atomic
-  // VTL:    value type list
-  // Chain:  memory chain for operaand
-  // Ptr:    address to update as a SDValue
-  // Cmp:    compare value
-  // Swp:    swap value
-  // SrcVal: address to update as a Value (used for MemOperand)
-  // Align:  alignment of memory
-  AtomicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTL,
-               EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp,
-               MachineMemOperand *MMO, AtomicOrdering Ordering,
-               SynchronizationScope SynchScope)
-      : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
-    InitAtomic(Ordering, Ordering, SynchScope);
-    InitOperands(Ops, Chain, Ptr, Cmp, Swp);
-  }
-  AtomicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTL,
-               EVT MemVT,
-               SDValue Chain, SDValue Ptr,
-               SDValue Val, MachineMemOperand *MMO,
-               AtomicOrdering Ordering, SynchronizationScope SynchScope)
-    : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
-    InitAtomic(Ordering, Ordering, SynchScope);
-    InitOperands(Ops, Chain, Ptr, Val);
-  }
-  AtomicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTL,
-               EVT MemVT,
-               SDValue Chain, SDValue Ptr,
-               MachineMemOperand *MMO,
-               AtomicOrdering Ordering, SynchronizationScope SynchScope)
-    : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
-    InitAtomic(Ordering, Ordering, SynchScope);
-    InitOperands(Ops, Chain, Ptr);
-  }
-  AtomicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTL, EVT MemVT,
-               const SDValue* AllOps, SDUse *DynOps, unsigned NumOps,
-               MachineMemOperand *MMO,
+  AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
+               EVT MemVT, MachineMemOperand *MMO,
                AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering,
                SynchronizationScope SynchScope)
-    : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
+      : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
     InitAtomic(SuccessOrdering, FailureOrdering, SynchScope);
-    assert((DynOps || NumOps <= array_lengthof(Ops)) &&
-           "Too many ops for internal storage!");
-    InitOperands(DynOps ? DynOps : Ops, AllOps, NumOps);
   }
 
   const SDValue &getBasePtr() const { return getOperand(1); }
@@ -1351,10 +1202,9 @@ public:
 /// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
 class MemIntrinsicSDNode : public MemSDNode {
 public:
-  MemIntrinsicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
-                     ArrayRef<SDValue> Ops, EVT MemoryVT,
-                     MachineMemOperand *MMO)
-    : MemSDNode(Opc, Order, dl, VTs, Ops, MemoryVT, MMO) {
+  MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
+                     SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
+      : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
     SubclassData |= 1u << 13;
   }
 
@@ -1377,20 +1227,15 @@ public:
 /// An index of -1 is treated as undef, such that the code generator may put
 /// any value in the corresponding element of the result.
 class ShuffleVectorSDNode : public SDNode {
-  SDUse Ops[2];
-
   // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
   // is freed when the SelectionDAG object is destroyed.
   const int *Mask;
 protected:
   friend class SelectionDAG;
-  ShuffleVectorSDNode(EVT VT, unsigned Order, DebugLoc dl, SDValue N1,
-                      SDValue N2, const int *M)
-    : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {
-    InitOperands(Ops, N1, N2);
-  }
-public:
+  ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
+      : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
 
+public:
   ArrayRef<int> getMask() const {
     EVT VT = getValueType(0);
     return makeArrayRef(Mask, VT.getVectorNumElements());
@@ -1414,7 +1259,7 @@ public:
 
   /// Change values in a shuffle permute mask assuming
   /// the two vector operands have swapped position.
-  static void commuteMask(SmallVectorImpl<int> &Mask) {
+  static void commuteMask(MutableArrayRef<int> Mask) {
     unsigned NumElems = Mask.size();
     for (unsigned i = 0; i != NumElems; ++i) {
       int idx = Mask[i];
@@ -1436,9 +1281,10 @@ class ConstantSDNode : public SDNode {
   const ConstantInt *Value;
   friend class SelectionDAG;
   ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val,
-                 DebugLoc DL, EVT VT)
-    : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant,
-             0, DL, getSDVTList(VT)), Value(val) {
+                 const DebugLoc &DL, EVT VT)
+      : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DL,
+               getSDVTList(VT)),
+        Value(val) {
     SubclassData |= (uint16_t)isOpaque;
   }
 public:
@@ -1463,10 +1309,12 @@ public:
 class ConstantFPSDNode : public SDNode {
   const ConstantFP *Value;
   friend class SelectionDAG;
-  ConstantFPSDNode(bool isTarget, const ConstantFP *val, DebugLoc DL, EVT VT)
-    : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP,
-             0, DL, getSDVTList(VT)), Value(val) {
-  }
+  ConstantFPSDNode(bool isTarget, const ConstantFP *val, const DebugLoc &DL,
+                   EVT VT)
+      : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, DL,
+               getSDVTList(VT)),
+        Value(val) {}
+
 public:
 
   const APFloat& getValueAPF() const { return Value->getValueAPF(); }
@@ -1517,15 +1365,19 @@ bool isNullFPConstant(SDValue V);
 bool isAllOnesConstant(SDValue V);
 /// Returns true if \p V is a constant integer one.
 bool isOneConstant(SDValue V);
+/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
+/// constant is canonicalized to be operand 1.
+bool isBitwiseNot(SDValue V);
 
 class GlobalAddressSDNode : public SDNode {
   const GlobalValue *TheGlobal;
   int64_t Offset;
   unsigned char TargetFlags;
   friend class SelectionDAG;
-  GlobalAddressSDNode(unsigned Opc, unsigned Order, DebugLoc DL,
+  GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
                       const GlobalValue *GA, EVT VT, int64_t o,
                       unsigned char TargetFlags);
+
 public:
 
   const GlobalValue *getGlobal() const { return TheGlobal; }
@@ -1821,13 +1673,11 @@ public:
 };
 
 class EHLabelSDNode : public SDNode {
-  SDUse Chain;
   MCSymbol *Label;
   friend class SelectionDAG;
-  EHLabelSDNode(unsigned Order, DebugLoc dl, SDValue ch, MCSymbol *L)
-    : SDNode(ISD::EH_LABEL, Order, dl, getSDVTList(MVT::Other)), Label(L) {
-    InitOperands(&Chain, ch);
-  }
+  EHLabelSDNode(unsigned Order, const DebugLoc &dl, MCSymbol *L)
+      : SDNode(ISD::EH_LABEL, Order, dl, getSDVTList(MVT::Other)), Label(L) {}
+
 public:
   MCSymbol *getLabel() const { return Label; }
 
@@ -1892,12 +1742,11 @@ public:
 class CvtRndSatSDNode : public SDNode {
   ISD::CvtCode CvtCode;
   friend class SelectionDAG;
-  explicit CvtRndSatSDNode(EVT VT, unsigned Order, DebugLoc dl,
-                           ArrayRef<SDValue> Ops, ISD::CvtCode Code)
-    : SDNode(ISD::CONVERT_RNDSAT, Order, dl, getSDVTList(VT), Ops),
-      CvtCode(Code) {
-    assert(Ops.size() == 5 && "wrong number of operations");
+  explicit CvtRndSatSDNode(EVT VT, unsigned Order, const DebugLoc &dl,
+                           ISD::CvtCode Code)
+      : SDNode(ISD::CONVERT_RNDSAT, Order, dl, getSDVTList(VT)), CvtCode(Code) {
   }
+
 public:
   ISD::CvtCode getCvtCode() const { return CvtCode; }
 
@@ -1926,24 +1775,13 @@ public:
 
 /// Base class for LoadSDNode and StoreSDNode
 class LSBaseSDNode : public MemSDNode {
-  //! Operand array for load and store
-  /*!
-    \note Moving this array to the base class captures more
-    common functionality shared between LoadSDNode and
-    StoreSDNode
-   */
-  SDUse Ops[4];
 public:
-  LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl,
-               SDValue *Operands, unsigned numOperands,
+  LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
                SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
                MachineMemOperand *MMO)
-    : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+      : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
     SubclassData |= AM << 2;
     assert(getAddressingMode() == AM && "MemIndexedMode encoding error!");
-    InitOperands(Ops, Operands, numOperands);
-    assert((getOffset().getOpcode() == ISD::UNDEF || isIndexed()) &&
-           "Only indexed loads and stores have a non-undef offset operand");
   }
 
   const SDValue &getOffset() const {
@@ -1971,10 +1809,10 @@ public:
 /// This class is used to represent ISD::LOAD nodes.
 class LoadSDNode : public LSBaseSDNode {
   friend class SelectionDAG;
-  LoadSDNode(SDValue *ChainPtrOff, unsigned Order, DebugLoc dl, SDVTList VTs,
+  LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
              ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
              MachineMemOperand *MMO)
-    : LSBaseSDNode(ISD::LOAD, Order, dl, ChainPtrOff, 3, VTs, AM, MemVT, MMO) {
+      : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
     SubclassData |= (unsigned short)ETy;
     assert(getExtensionType() == ETy && "LoadExtType encoding error!");
     assert(readMem() && "Load MachineMemOperand is not a load!");
@@ -1999,11 +1837,10 @@ public:
 /// This class is used to represent ISD::STORE nodes.
 class StoreSDNode : public LSBaseSDNode {
   friend class SelectionDAG;
-  StoreSDNode(SDValue *ChainValuePtrOff, unsigned Order, DebugLoc dl,
-              SDVTList VTs, ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
+  StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+              ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
               MachineMemOperand *MMO)
-    : LSBaseSDNode(ISD::STORE, Order, dl, ChainValuePtrOff, 4,
-                   VTs, AM, MemVT, MMO) {
+      : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
     SubclassData |= (unsigned short)isTrunc;
     assert(isTruncatingStore() == isTrunc && "isTrunc encoding error!");
     assert(!readMem() && "Store MachineMemOperand is a load!");
@@ -2027,16 +1864,12 @@ public:
 
 /// This base class is used to represent MLOAD and MSTORE nodes
 class MaskedLoadStoreSDNode : public MemSDNode {
-  // Operands
-  SDUse Ops[4];
 public:
   friend class SelectionDAG;
-  MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl,
-                        SDValue *Operands, unsigned numOperands, SDVTList VTs,
-                        EVT MemVT, MachineMemOperand *MMO)
-      : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
-    InitOperands(Ops, Operands, numOperands);
-  }
+  MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
+                        const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+                        MachineMemOperand *MMO)
+      : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
 
   // In the both nodes address is Op1, mask is Op2:
   // MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value
@@ -2055,11 +1888,9 @@ public:
 class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
 public:
   friend class SelectionDAG;
-  MaskedLoadSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
-                   unsigned numOperands, SDVTList VTs, ISD::LoadExtType ETy,
-                   EVT MemVT, MachineMemOperand *MMO)
-    : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
-                            VTs, MemVT, MMO) {
+  MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+                   ISD::LoadExtType ETy, EVT MemVT, MachineMemOperand *MMO)
+      : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, MemVT, MMO) {
     SubclassData |= (unsigned short)ETy;
   }
 
@@ -2077,12 +1908,10 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
 
 public:
   friend class SelectionDAG;
-  MaskedStoreSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
-                    unsigned numOperands, SDVTList VTs, bool isTrunc, EVT MemVT,
-                    MachineMemOperand *MMO)
-    : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
-                            VTs, MemVT, MMO) {
-      SubclassData |= (unsigned short)isTrunc;
+  MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+                    bool isTrunc, EVT MemVT, MachineMemOperand *MMO)
+      : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) {
+    SubclassData |= (unsigned short)isTrunc;
   }
   /// Return true if the op does a truncation before store.
   /// For integers this is the same as doing a TRUNCATE and storing the result.
@@ -2100,17 +1929,12 @@ public:
 /// MGATHER and MSCATTER nodes
 ///
 class MaskedGatherScatterSDNode : public MemSDNode {
-  // Operands
-  SDUse Ops[5];
 public:
   friend class SelectionDAG;
-  MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl,
-                            ArrayRef<SDValue> Operands, SDVTList VTs, EVT MemVT,
+  MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
+                            const DebugLoc &dl, SDVTList VTs, EVT MemVT,
                             MachineMemOperand *MMO)
-    : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
-    assert(Operands.size() == 5 && "Incompatible number of operands");
-    InitOperands(Ops, Operands.data(), Operands.size());
-  }
+      : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
 
   // In the both nodes address is Op1, mask is Op2:
   // MaskedGatherSDNode  (Chain, src0, mask, base, index), src0 is a passthru value
@@ -2132,19 +1956,9 @@ public:
 class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
 public:
   friend class SelectionDAG;
-  MaskedGatherSDNode(unsigned Order, DebugLoc dl, ArrayRef<SDValue> Operands,
-                     SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
-    : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, Operands, VTs, MemVT,
-                                MMO) {
-    assert(getValue().getValueType() == getValueType(0) &&
-           "Incompatible type of the PassThru value in MaskedGatherSDNode");
-    assert(getMask().getValueType().getVectorNumElements() ==
-           getValueType(0).getVectorNumElements() &&
-           "Vector width mismatch between mask and data");
-    assert(getIndex().getValueType().getVectorNumElements() ==
-           getValueType(0).getVectorNumElements() &&
-           "Vector width mismatch between index and data");
-  }
+  MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+                     EVT MemVT, MachineMemOperand *MMO)
+      : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {}
 
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MGATHER;
@@ -2157,17 +1971,9 @@ class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
 
 public:
   friend class SelectionDAG;
-  MaskedScatterSDNode(unsigned Order, DebugLoc dl,ArrayRef<SDValue> Operands,
-                      SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
-    : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, Operands, VTs, MemVT,
-                                MMO) {
-    assert(getMask().getValueType().getVectorNumElements() ==
-           getValue().getValueType().getVectorNumElements() &&
-           "Vector width mismatch between mask and data");
-    assert(getIndex().getValueType().getVectorNumElements() ==
-           getValue().getValueType().getVectorNumElements() &&
-           "Vector width mismatch between index and data");
-  }
+  MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+                      EVT MemVT, MachineMemOperand *MMO)
+      : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {}
 
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MSCATTER;
@@ -2183,12 +1989,8 @@ public:
 
 private:
   friend class SelectionDAG;
-  MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc DL, SDVTList VTs)
-    : SDNode(Opc, Order, DL, VTs), MemRefs(nullptr), MemRefsEnd(nullptr) {}
-
-  /// Operands for this instruction, if they fit here. If
-  /// they don't, this field is unused.
-  SDUse LocalOperands[4];
+  MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
+      : SDNode(Opc, Order, DL, VTs), MemRefs(nullptr), MemRefsEnd(nullptr) {}
 
   /// Memory reference descriptions for this instruction.
   mmo_iterator MemRefs;
@@ -2264,8 +2066,13 @@ template <> struct GraphTraits<SDNode*> {
   }
 };
 
-/// The largest SDNode class.
-typedef MaskedGatherScatterSDNode LargestSDNode;
+/// A representation of the largest SDNode, for use in sizeof().
+///
+/// This needs to be a union because the largest node differs on 32 bit systems
+/// with 4 and 8 byte pointer alignment, respectively.
+typedef AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
+                              BlockAddressSDNode, GlobalAddressSDNode>
+    LargestSDNode;
 
 /// The SDNode class with the greatest alignment requirement.
 typedef GlobalAddressSDNode MostAlignedSDNode;
diff --git a/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/include/llvm/CodeGen/SelectionDAGTargetInfo.h
new file mode 100644
index 0000000000000..ac5092af8def3
--- /dev/null
+++ b/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -0,0 +1,156 @@
+//==-- llvm/CodeGen/SelectionDAGTargetInfo.h - SelectionDAG Info -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SelectionDAGTargetInfo class, which targets can
+// subclass to parameterize the SelectionDAG lowering and instruction
+// selection process.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SELECTIONDAGTARGETINFO_H
+#define LLVM_CODEGEN_SELECTIONDAGTARGETINFO_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// Targets can subclass this to parameterize the
+/// SelectionDAG lowering and instruction selection process.
+///
+class SelectionDAGTargetInfo {
+  SelectionDAGTargetInfo(const SelectionDAGTargetInfo &) = delete;
+  void operator=(const SelectionDAGTargetInfo &) = delete;
+
+public:
+  explicit SelectionDAGTargetInfo() = default;
+  virtual ~SelectionDAGTargetInfo();
+
+  /// Emit target-specific code that performs a memcpy.
+  /// This can be used by targets to provide code sequences for cases
+  /// that don't fit the target's parameters for simple loads/stores and can be
+  /// more efficient than using a library call. This function can return a null
+  /// SDValue if the target declines to use custom code and a different
+  /// lowering strategy should be used.
+  ///
+  /// If AlwaysInline is true, the size is constant and the target should not
+  /// emit any calls and is strongly encouraged to attempt to emit inline code
+  /// even if it is beyond the usual threshold because this intrinsic is being
+  /// expanded in a place where calls are not feasible (e.g. within the prologue
+  /// for another call). If the target chooses to decline an AlwaysInline
+  /// request here, legalize will resort to using simple loads and stores.
+  virtual SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+                                          SDValue Chain, SDValue Op1,
+                                          SDValue Op2, SDValue Op3,
+                                          unsigned Align, bool isVolatile,
+                                          bool AlwaysInline,
+                                          MachinePointerInfo DstPtrInfo,
+                                          MachinePointerInfo SrcPtrInfo) const {
+    return SDValue();
+  }
+
+  /// Emit target-specific code that performs a memmove.
+  /// This can be used by targets to provide code sequences for cases
+  /// that don't fit the target's parameters for simple loads/stores and can be
+  /// more efficient than using a library call. This function can return a null
+  /// SDValue if the target declines to use custom code and a different
+  /// lowering strategy should be used.
+  virtual SDValue EmitTargetCodeForMemmove(
+      SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1,
+      SDValue Op2, SDValue Op3, unsigned Align, bool isVolatile,
+      MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+    return SDValue();
+  }
+
+  /// Emit target-specific code that performs a memset.
+  /// This can be used by targets to provide code sequences for cases
+  /// that don't fit the target's parameters for simple stores and can be more
+  /// efficient than using a library call. This function can return a null
+  /// SDValue if the target declines to use custom code and a different
+  /// lowering strategy should be used.
+  virtual SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
+                                          SDValue Chain, SDValue Op1,
+                                          SDValue Op2, SDValue Op3,
+                                          unsigned Align, bool isVolatile,
+                                          MachinePointerInfo DstPtrInfo) const {
+    return SDValue();
+  }
+
+  /// Emit target-specific code that performs a memcmp, in cases where that is
+  /// faster than a libcall. The first returned SDValue is the result of the
+  /// memcmp and the second is the chain. Both SDValues can be null if a normal
+  /// libcall should be used.
+  virtual std::pair<SDValue, SDValue>
+  EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
+                          SDValue Op1, SDValue Op2, SDValue Op3,
+                          MachinePointerInfo Op1PtrInfo,
+                          MachinePointerInfo Op2PtrInfo) const {
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  /// Emit target-specific code that performs a memchr, in cases where that is
+  /// faster than a libcall. The first returned SDValue is the result of the
+  /// memchr and the second is the chain. Both SDValues can be null if a normal
+  /// libcall should be used.
+  virtual std::pair<SDValue, SDValue>
+  EmitTargetCodeForMemchr(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
+                          SDValue Src, SDValue Char, SDValue Length,
+                          MachinePointerInfo SrcPtrInfo) const {
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  /// Emit target-specific code that performs a strcpy or stpcpy, in cases
+  /// where that is faster than a libcall.
+  /// The first returned SDValue is the result of the copy (the start
+  /// of the destination string for strcpy, a pointer to the null terminator
+  /// for stpcpy) and the second is the chain.  Both SDValues can be null
+  /// if a normal libcall should be used.
+  virtual std::pair<SDValue, SDValue>
+  EmitTargetCodeForStrcpy(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
+                          SDValue Dest, SDValue Src,
+                          MachinePointerInfo DestPtrInfo,
+                          MachinePointerInfo SrcPtrInfo, bool isStpcpy) const {
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  /// Emit target-specific code that performs a strcmp, in cases where that is
+  /// faster than a libcall.
+  /// The first returned SDValue is the result of the strcmp and the second is
+  /// the chain. Both SDValues can be null if a normal libcall should be used.
+  virtual std::pair<SDValue, SDValue>
+  EmitTargetCodeForStrcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
+                          SDValue Op1, SDValue Op2,
+                          MachinePointerInfo Op1PtrInfo,
+                          MachinePointerInfo Op2PtrInfo) const {
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  virtual std::pair<SDValue, SDValue>
+  EmitTargetCodeForStrlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
+                          SDValue Src, MachinePointerInfo SrcPtrInfo) const {
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  virtual std::pair<SDValue, SDValue>
+  EmitTargetCodeForStrnlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
+                           SDValue Src, SDValue MaxLength,
+                           MachinePointerInfo SrcPtrInfo) const {
+    return std::make_pair(SDValue(), SDValue());
+  }
+  // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
+  // than FMUL and ADD is delegated to the machine combiner.
+  virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const {
+    return false;
+  }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 7b621bee259ff..afb0288b024fb 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -66,7 +66,6 @@ namespace llvm {
 
     bool isPoisoned() const { return (reinterpret_cast<intptr_t>(mi) & 0x1) == 0x1; }
 #endif // EXPENSIVE_CHECKS
-
   };
 
   template <>
@@ -99,7 +98,7 @@ namespace llvm {
       Slot_Block,
 
       /// Early-clobber register use/def slot.  A live range defined at
-      /// Slot_EarlyCLobber interferes with normal live ranges killed at
+      /// Slot_EarlyClobber interferes with normal live ranges killed at
       /// Slot_Register.  Also used as the kill slot for live ranges tied to an
       /// early-clobber def.
       Slot_EarlyClobber,
@@ -213,6 +212,12 @@ namespace llvm {
       return A.listEntry()->getIndex() < B.listEntry()->getIndex();
     }
 
+    /// Return true if A refers to the same instruction as B or an earlier one.
+    /// This is equivalent to !isEarlierInstr(B, A).
+    static bool isEarlierEqualInstr(SlotIndex A, SlotIndex B) {
+      return !isEarlierInstr(B, A);
+    }
+
     /// Return the distance from this index to the given one.
     int distance(SlotIndex other) const {
       return other.getIndex() - getIndex();
@@ -302,7 +307,6 @@ namespace llvm {
     SlotIndex getPrevIndex() const {
       return SlotIndex(&*--listEntry()->getIterator(), getSlot());
     }
-
   };
 
   template <> struct isPodLike<SlotIndex> { static const bool value = true; };
@@ -376,7 +380,7 @@ namespace llvm {
       initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     }
 
-    ~SlotIndexes() {
+    ~SlotIndexes() override {
       // The indexList's nodes are all allocated in the BumpPtrAllocator.
       indexList.clearAndLeakNodesUnsafely();
     }
@@ -410,14 +414,14 @@ namespace llvm {
 
     /// Returns true if the given machine instr is mapped to an index,
     /// otherwise returns false.
-    bool hasIndex(const MachineInstr *instr) const {
-      return mi2iMap.count(instr);
+    bool hasIndex(const MachineInstr &instr) const {
+      return mi2iMap.count(&instr);
     }
 
     /// Returns the base index for the given instruction.
-    SlotIndex getInstructionIndex(const MachineInstr *MI) const {
+    SlotIndex getInstructionIndex(const MachineInstr &MI) const {
       // Instructions inside a bundle have the same number as the bundle itself.
-      Mi2IndexMap::const_iterator itr = mi2iMap.find(getBundleStart(MI));
+      Mi2IndexMap::const_iterator itr = mi2iMap.find(&getBundleStart(MI));
       assert(itr != mi2iMap.end() && "Instruction not found in maps.");
       return itr->second;
     }
@@ -443,15 +447,15 @@ namespace llvm {
     /// getIndexBefore - Returns the index of the last indexed instruction
     /// before MI, or the start index of its basic block.
     /// MI is not required to have an index.
-    SlotIndex getIndexBefore(const MachineInstr *MI) const {
-      const MachineBasicBlock *MBB = MI->getParent();
+    SlotIndex getIndexBefore(const MachineInstr &MI) const {
+      const MachineBasicBlock *MBB = MI.getParent();
       assert(MBB && "MI must be inserted inna basic block");
       MachineBasicBlock::const_iterator I = MI, B = MBB->begin();
       for (;;) {
         if (I == B)
           return getMBBStartIdx(MBB);
         --I;
-        Mi2IndexMap::const_iterator MapItr = mi2iMap.find(I);
+        Mi2IndexMap::const_iterator MapItr = mi2iMap.find(&*I);
         if (MapItr != mi2iMap.end())
           return MapItr->second;
       }
@@ -460,15 +464,15 @@ namespace llvm {
     /// getIndexAfter - Returns the index of the first indexed instruction
     /// after MI, or the end index of its basic block.
     /// MI is not required to have an index.
-    SlotIndex getIndexAfter(const MachineInstr *MI) const {
-      const MachineBasicBlock *MBB = MI->getParent();
+    SlotIndex getIndexAfter(const MachineInstr &MI) const {
+      const MachineBasicBlock *MBB = MI.getParent();
       assert(MBB && "MI must be inserted inna basic block");
       MachineBasicBlock::const_iterator I = MI, E = MBB->end();
       for (;;) {
         ++I;
         if (I == E)
           return getMBBEndIdx(MBB);
-        Mi2IndexMap::const_iterator MapItr = mi2iMap.find(I);
+        Mi2IndexMap::const_iterator MapItr = mi2iMap.find(&*I);
         if (MapItr != mi2iMap.end())
           return MapItr->second;
       }
@@ -573,25 +577,25 @@ namespace llvm {
     /// If Late is set and there are null indexes between mi's neighboring
     /// instructions, create the new index after the null indexes instead of
     /// before them.
-    SlotIndex insertMachineInstrInMaps(MachineInstr *mi, bool Late = false) {
-      assert(!mi->isInsideBundle() &&
+    SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late = false) {
+      assert(!MI.isInsideBundle() &&
              "Instructions inside bundles should use bundle start's slot.");
-      assert(mi2iMap.find(mi) == mi2iMap.end() && "Instr already indexed.");
+      assert(mi2iMap.find(&MI) == mi2iMap.end() && "Instr already indexed.");
       // Numbering DBG_VALUE instructions could cause code generation to be
       // affected by debug information.
-      assert(!mi->isDebugValue() && "Cannot number DBG_VALUE instructions.");
+      assert(!MI.isDebugValue() && "Cannot number DBG_VALUE instructions.");
 
-      assert(mi->getParent() != nullptr && "Instr must be added to function.");
+      assert(MI.getParent() != nullptr && "Instr must be added to function.");
 
-      // Get the entries where mi should be inserted.
+      // Get the entries where MI should be inserted.
       IndexList::iterator prevItr, nextItr;
       if (Late) {
-        // Insert mi's index immediately before the following instruction.
-        nextItr = getIndexAfter(mi).listEntry()->getIterator();
+        // Insert MI's index immediately before the following instruction.
+        nextItr = getIndexAfter(MI).listEntry()->getIterator();
         prevItr = std::prev(nextItr);
       } else {
-        // Insert mi's index immediately after the preceding instruction.
-        prevItr = getIndexBefore(mi).listEntry()->getIterator();
+        // Insert MI's index immediately after the preceding instruction.
+        prevItr = getIndexBefore(MI).listEntry()->getIterator();
         nextItr = std::next(prevItr);
       }
 
@@ -600,27 +604,27 @@ namespace llvm {
       unsigned dist = ((nextItr->getIndex() - prevItr->getIndex())/2) & ~3u;
       unsigned newNumber = prevItr->getIndex() + dist;
 
-      // Insert a new list entry for mi.
+      // Insert a new list entry for MI.
       IndexList::iterator newItr =
-        indexList.insert(nextItr, createEntry(mi, newNumber));
+          indexList.insert(nextItr, createEntry(&MI, newNumber));
 
       // Renumber locally if we need to.
       if (dist == 0)
         renumberIndexes(newItr);
 
       SlotIndex newIndex(&*newItr, SlotIndex::Slot_Block);
-      mi2iMap.insert(std::make_pair(mi, newIndex));
+      mi2iMap.insert(std::make_pair(&MI, newIndex));
       return newIndex;
     }
 
     /// Remove the given machine instruction from the mapping.
-    void removeMachineInstrFromMaps(MachineInstr *mi) {
+    void removeMachineInstrFromMaps(MachineInstr &MI) {
       // remove index -> MachineInstr and
       // MachineInstr -> index mappings
-      Mi2IndexMap::iterator mi2iItr = mi2iMap.find(mi);
+      Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
       if (mi2iItr != mi2iMap.end()) {
         IndexListEntry *miEntry(mi2iItr->second.listEntry());
-        assert(miEntry->getInstr() == mi && "Instruction indexes broken.");
+        assert(miEntry->getInstr() == &MI && "Instruction indexes broken.");
         // FIXME: Eventually we want to actually delete these indexes.
         miEntry->setInstr(nullptr);
         mi2iMap.erase(mi2iItr);
@@ -629,17 +633,17 @@ namespace llvm {
 
     /// ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in
     /// maps used by register allocator.
-    void replaceMachineInstrInMaps(MachineInstr *mi, MachineInstr *newMI) {
-      Mi2IndexMap::iterator mi2iItr = mi2iMap.find(mi);
+    void replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI) {
+      Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
       if (mi2iItr == mi2iMap.end())
         return;
       SlotIndex replaceBaseIndex = mi2iItr->second;
       IndexListEntry *miEntry(replaceBaseIndex.listEntry());
-      assert(miEntry->getInstr() == mi &&
+      assert(miEntry->getInstr() == &MI &&
              "Mismatched instruction in index tables.");
-      miEntry->setInstr(newMI);
+      miEntry->setInstr(&NewMI);
       mi2iMap.erase(mi2iItr);
-      mi2iMap.insert(std::make_pair(newMI, replaceBaseIndex));
+      mi2iMap.insert(std::make_pair(&NewMI, replaceBaseIndex));
     }
 
     /// Add the given MachineBasicBlock into the maps.
@@ -703,15 +707,13 @@ namespace llvm {
       indexList.erase(entry);
 #endif
     }
-
   };
 
-
   // Specialize IntervalMapInfo for half-open slot index intervals.
   template <>
   struct IntervalMapInfo<SlotIndex> : IntervalMapHalfOpenInfo<SlotIndex> {
   };
 
-}
+} // end namespace llvm
 
 #endif // LLVM_CODEGEN_SLOTINDEXES_H
diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h
index 972a616ad779a..918848f6b2a13 100644
--- a/include/llvm/CodeGen/StackMaps.h
+++ b/include/llvm/CodeGen/StackMaps.h
@@ -14,8 +14,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Debug.h"
-#include <map>
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h
index 8cef85cb44852..1b3c0eb4a4d0a 100644
--- a/include/llvm/CodeGen/StackProtector.h
+++ b/include/llvm/CodeGen/StackProtector.h
@@ -75,6 +75,12 @@ private:
   /// times.
   SmallPtrSet<const PHINode *, 16> VisitedPHIs;
 
+  // A prologue is generated.
+  bool HasPrologue = false;
+
+  // IR checking code is generated.
+  bool HasIRCheck = false;
+
   /// InsertStackProtectors - Insert code into the prologue and epilogue of
   /// the function.
   ///
@@ -120,6 +126,10 @@ public:
   }
 
   SSPLayoutKind getSSPLayout(const AllocaInst *AI) const;
+
+  // Return true if StackProtector is supposed to be handled by SelectionDAG.
+  bool shouldEmitSDCheck(const BasicBlock &BB) const;
+
   void adjustForColoring(const AllocaInst *From, const AllocaInst *To);
 
   bool runOnFunction(Function &Fn) override;
diff --git a/include/llvm/CodeGen/TailDuplicator.h b/include/llvm/CodeGen/TailDuplicator.h
new file mode 100644
index 0000000000000..8e65199418a6b
--- /dev/null
+++ b/include/llvm/CodeGen/TailDuplicator.h
@@ -0,0 +1,92 @@
+//===-- llvm/CodeGen/TailDuplicator.h ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TailDuplicator class. Used by the
+// TailDuplication pass, and MachineBlockPlacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TAILDUPLICATOR_H
+#define LLVM_CODEGEN_TAILDUPLICATOR_H
+
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+namespace llvm {
+
+/// Utility class to perform tail duplication.
+class TailDuplicator {
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const MachineBranchProbabilityInfo *MBPI;
+  const MachineModuleInfo *MMI;
+  MachineRegisterInfo *MRI;
+  bool PreRegAlloc;
+
+  // A list of virtual registers for which to update SSA form.
+  SmallVector<unsigned, 16> SSAUpdateVRs;
+
+  // For each virtual register in SSAUpdateVals keep a list of source virtual
+  // registers.
+  typedef std::vector<std::pair<MachineBasicBlock *, unsigned>> AvailableValsTy;
+
+  DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
+
+public:
+  void initMF(MachineFunction &MF, const MachineModuleInfo *MMI,
+              const MachineBranchProbabilityInfo *MBPI);
+  bool tailDuplicateBlocks(MachineFunction &MF);
+  static bool isSimpleBB(MachineBasicBlock *TailBB);
+  bool shouldTailDuplicate(const MachineFunction &MF, bool IsSimple,
+                           MachineBasicBlock &TailBB);
+  bool tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
+                              MachineBasicBlock *MBB);
+
+private:
+  typedef TargetInstrInfo::RegSubRegPair RegSubRegPair;
+
+  void addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+                         MachineBasicBlock *BB);
+  void processPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
+                  MachineBasicBlock *PredBB,
+                  DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
+                  SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
+                  const DenseSet<unsigned> &UsedByPhi, bool Remove);
+  void duplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB,
+                            MachineBasicBlock *PredBB, MachineFunction &MF,
+                            DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
+                            const DenseSet<unsigned> &UsedByPhi);
+  void updateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+                            SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+                            SmallSetVector<MachineBasicBlock *, 8> &Succs);
+  bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
+  bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+                         SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+                         const DenseSet<unsigned> &RegsUsedByPhi,
+                         SmallVectorImpl<MachineInstr *> &Copies);
+  bool tailDuplicate(MachineFunction &MF, bool IsSimple,
+                     MachineBasicBlock *TailBB,
+                     SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+                     SmallVectorImpl<MachineInstr *> &Copies);
+  void appendCopies(MachineBasicBlock *MBB,
+                 SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
+                 SmallVectorImpl<MachineInstr *> &Copies);
+
+  void removeDeadBlock(MachineBasicBlock *MBB);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 2f1379131cbdc..c856435f5ddca 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 
-#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
@@ -23,7 +23,6 @@ namespace llvm {
   class MachineModuleInfo;
   class Mangler;
   class MCAsmInfo;
-  class MCExpr;
   class MCSection;
   class MCSectionMachO;
   class MCSymbol;
@@ -36,6 +35,10 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
   bool UseInitArray;
   mutable unsigned NextUniqueID = 0;
 
+protected:
+  MCSymbolRefExpr::VariantKind PLTRelativeVariantKind =
+      MCSymbolRefExpr::VK_None;
+
 public:
   TargetLoweringObjectFileELF() : UseInitArray(false) {}
 
@@ -47,7 +50,8 @@ public:
   /// Given a constant with the SectionKind, return a section that it should be
   /// placed in.
   MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
-                                   const Constant *C) const override;
+                                   const Constant *C,
+                                   unsigned &Align) const override;
 
   MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                                       Mangler &Mang,
@@ -81,6 +85,10 @@ public:
                                   const MCSymbol *KeySym) const override;
   MCSection *getStaticDtorSection(unsigned Priority,
                                   const MCSymbol *KeySym) const override;
+
+  const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
+                                       const GlobalValue *RHS, Mangler &Mang,
+                                       const TargetMachine &TM) const override;
 };
 
 
@@ -104,7 +112,8 @@ public:
                                       const TargetMachine &TM) const override;
 
   MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
-                                   const Constant *C) const override;
+                                   const Constant *C,
+                                   unsigned &Align) const override;
 
   /// The mach-o version of this method defaults to returning a stub reference.
   const MCExpr *
@@ -131,6 +140,8 @@ public:
 
 
 class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
+  mutable unsigned NextUniqueID = 0;
+
 public:
   ~TargetLoweringObjectFileCOFF() override {}
 
diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h
new file mode 100644
index 0000000000000..9309655a972ed
--- /dev/null
+++ b/include/llvm/CodeGen/TargetPassConfig.h
@@ -0,0 +1,376 @@
+//===-- TargetPassConfig.h - Code Generation pass options -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// Target-Independent Code Generator Pass Configuration Options pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TARGETPASSCONFIG_H
+#define LLVM_CODEGEN_TARGETPASSCONFIG_H
+
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include <string>
+
+namespace llvm {
+
+class PassConfigImpl;
+class ScheduleDAGInstrs;
+class TargetMachine;
+struct MachineSchedContext;
+
+// The old pass manager infrastructure is hidden in a legacy namespace now.
+namespace legacy {
+class PassManagerBase;
+}
+using legacy::PassManagerBase;
+
+/// Discriminated union of Pass ID types.
+///
+/// The PassConfig API prefers dealing with IDs because they are safer and more
+/// efficient. IDs decouple configuration from instantiation. This way, when a
+/// pass is overriden, it isn't unnecessarily instantiated. It is also unsafe to
+/// refer to a Pass pointer after adding it to a pass manager, which deletes
+/// redundant pass instances.
+///
+/// However, it is convient to directly instantiate target passes with
+/// non-default ctors. These often don't have a registered PassInfo. Rather than
+/// force all target passes to implement the pass registry boilerplate, allow
+/// the PassConfig API to handle either type.
+///
+/// AnalysisID is sadly char*, so PointerIntPair won't work.
+class IdentifyingPassPtr {
+  union {
+    AnalysisID ID;
+    Pass *P;
+  };
+  bool IsInstance;
+public:
+  IdentifyingPassPtr() : P(nullptr), IsInstance(false) {}
+  IdentifyingPassPtr(AnalysisID IDPtr) : ID(IDPtr), IsInstance(false) {}
+  IdentifyingPassPtr(Pass *InstancePtr) : P(InstancePtr), IsInstance(true) {}
+
+  bool isValid() const { return P; }
+  bool isInstance() const { return IsInstance; }
+
+  AnalysisID getID() const {
+    assert(!IsInstance && "Not a Pass ID");
+    return ID;
+  }
+  Pass *getInstance() const {
+    assert(IsInstance && "Not a Pass Instance");
+    return P;
+  }
+};
+
+template <> struct isPodLike<IdentifyingPassPtr> {
+  static const bool value = true;
+};
+
+/// Target-Independent Code Generator Pass Configuration Options.
+///
+/// This is an ImmutablePass solely for the purpose of exposing CodeGen options
+/// to the internals of other CodeGen passes.
+class TargetPassConfig : public ImmutablePass {
+public:
+  /// Pseudo Pass IDs. These are defined within TargetPassConfig because they
+  /// are unregistered pass IDs. They are only useful for use with
+  /// TargetPassConfig APIs to identify multiple occurrences of the same pass.
+  ///
+
+  /// EarlyTailDuplicate - A clone of the TailDuplicate pass that runs early
+  /// during codegen, on SSA form.
+  static char EarlyTailDuplicateID;
+
+  /// PostRAMachineLICM - A clone of the LICM pass that runs during late machine
+  /// optimization after regalloc.
+  static char PostRAMachineLICMID;
+
+private:
+  PassManagerBase *PM;
+  AnalysisID StartBefore, StartAfter;
+  AnalysisID StopAfter;
+  bool Started;
+  bool Stopped;
+  bool AddingMachinePasses;
+
+protected:
+  TargetMachine *TM;
+  PassConfigImpl *Impl; // Internal data structures
+  bool Initialized;     // Flagged after all passes are configured.
+
+  // Target Pass Options
+  // Targets provide a default setting, user flags override.
+  //
+  bool DisableVerify;
+
+  /// Default setting for -enable-tail-merge on this target.
+  bool EnableTailMerge;
+
+public:
+  TargetPassConfig(TargetMachine *tm, PassManagerBase &pm);
+  // Dummy constructor.
+  TargetPassConfig();
+
+  ~TargetPassConfig() override;
+
+  static char ID;
+
+  /// Get the right type of TargetMachine for this target.
+  template<typename TMC> TMC &getTM() const {
+    return *static_cast<TMC*>(TM);
+  }
+
+  //
+  void setInitialized() { Initialized = true; }
+
+  CodeGenOpt::Level getOptLevel() const;
+
+  /// Set the StartAfter, StartBefore and StopAfter passes to allow running only
+  /// a portion of the normal code-gen pass sequence.
+  ///
+  /// If the StartAfter and StartBefore pass ID is zero, then compilation will
+  /// begin at the normal point; otherwise, clear the Started flag to indicate
+  /// that passes should not be added until the starting pass is seen.  If the
+  /// Stop pass ID is zero, then compilation will continue to the end.
+  ///
+  /// This function expects that at least one of the StartAfter or the
+  /// StartBefore pass IDs is null.
+  void setStartStopPasses(AnalysisID StartBefore, AnalysisID StartAfter,
+                          AnalysisID StopAfter) {
+    if (StartAfter)
+      assert(!StartBefore && "Start after and start before passes are given");
+    this->StartBefore = StartBefore;
+    this->StartAfter = StartAfter;
+    this->StopAfter = StopAfter;
+    Started = (StartAfter == nullptr) && (StartBefore == nullptr);
+  }
+
+  void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); }
+
+  bool getEnableTailMerge() const { return EnableTailMerge; }
+  void setEnableTailMerge(bool Enable) { setOpt(EnableTailMerge, Enable); }
+
+  /// Allow the target to override a specific pass without overriding the pass
+  /// pipeline. When passes are added to the standard pipeline at the
+  /// point where StandardID is expected, add TargetID in its place.
+  void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID);
+
+  /// Insert InsertedPassID pass after TargetPassID pass.
+  void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
+                  bool VerifyAfter = true, bool PrintAfter = true);
+
+  /// Allow the target to enable a specific standard pass by default.
+  void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
+
+  /// Allow the target to disable a specific standard pass by default.
+  void disablePass(AnalysisID PassID) {
+    substitutePass(PassID, IdentifyingPassPtr());
+  }
+
+  /// Return the pass substituted for StandardID by the target.
+  /// If no substitution exists, return StandardID.
+  IdentifyingPassPtr getPassSubstitution(AnalysisID StandardID) const;
+
+  /// Return true if the pass has been substituted by the target or
+  /// overridden on the command line.
+  bool isPassSubstitutedOrOverridden(AnalysisID ID) const;
+
+  /// Return true if the optimized regalloc pipeline is enabled.
+  bool getOptimizeRegAlloc() const;
+
+  /// Return true if shrink wrapping is enabled.
+  bool getEnableShrinkWrap() const;
+
+  /// Return true if the default global register allocator is in use and
+  /// has not be overriden on the command line with '-regalloc=...'
+  bool usingDefaultRegAlloc() const;
+
+  /// Add common target configurable passes that perform LLVM IR to IR
+  /// transforms following machine independent optimization.
+  virtual void addIRPasses();
+
+  /// Add passes to lower exception handling for the code generator.
+  void addPassesToHandleExceptions();
+
+  /// Add pass to prepare the LLVM IR for code generation. This should be done
+  /// before exception handling preparation passes.
+  virtual void addCodeGenPrepare();
+
+  /// Add common passes that perform LLVM IR to IR transforms in preparation for
+  /// instruction selection.
+  virtual void addISelPrepare();
+
+  /// addInstSelector - This method should install an instruction selector pass,
+  /// which converts from LLVM code to machine instructions.
+  virtual bool addInstSelector() {
+    return true;
+  }
+
+  /// This method should install an IR translator pass, which converts from
+  /// LLVM code to machine instructions with possibly generic opcodes.
+  virtual bool addIRTranslator() { return true; }
+
+  /// This method may be implemented by targets that want to run passes
+  /// immediately before the register bank selection.
+  virtual void addPreRegBankSelect() {}
+
+  /// This method should install a register bank selector pass, which
+  /// assigns register banks to virtual registers without a register
+  /// class or register banks.
+  virtual bool addRegBankSelect() { return true; }
+
+  /// Add the complete, standard set of LLVM CodeGen passes.
+  /// Fully developed targets will not generally override this.
+  virtual void addMachinePasses();
+
+  /// Create an instance of ScheduleDAGInstrs to be run within the standard
+  /// MachineScheduler pass for this function and target at the current
+  /// optimization level.
+  ///
+  /// This can also be used to plug a new MachineSchedStrategy into an instance
+  /// of the standard ScheduleDAGMI:
+  ///   return new ScheduleDAGMI(C, make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false)
+  ///
+  /// Return NULL to select the default (generic) machine scheduler.
+  virtual ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const {
+    return nullptr;
+  }
+
+  /// Similar to createMachineScheduler but used when postRA machine scheduling
+  /// is enabled.
+  virtual ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const {
+    return nullptr;
+  }
+
+  /// printAndVerify - Add a pass to dump then verify the machine function, if
+  /// those steps are enabled.
+  ///
+  void printAndVerify(const std::string &Banner);
+
+  /// Add a pass to print the machine function if printing is enabled.
+  void addPrintPass(const std::string &Banner);
+
+  /// Add a pass to perform basic verification of the machine function if
+  /// verification is enabled.
+  void addVerifyPass(const std::string &Banner);
+
+protected:
+  // Helper to verify the analysis is really immutable.
+  void setOpt(bool &Opt, bool Val);
+
+  /// Methods with trivial inline returns are convenient points in the common
+  /// codegen pass pipeline where targets may insert passes. Methods with
+  /// out-of-line standard implementations are major CodeGen stages called by
+  /// addMachinePasses. Some targets may override major stages when inserting
+  /// passes is insufficient, but maintaining overriden stages is more work.
+  ///
+
+  /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM
+  /// passes (which are run just before instruction selector).
+  virtual bool addPreISel() {
+    return true;
+  }
+
+  /// addMachineSSAOptimization - Add standard passes that optimize machine
+  /// instructions in SSA form.
+  virtual void addMachineSSAOptimization();
+
+  /// Add passes that optimize instruction level parallelism for out-of-order
+  /// targets. These passes are run while the machine code is still in SSA
+  /// form, so they can use MachineTraceMetrics to control their heuristics.
+  ///
+  /// All passes added here should preserve the MachineDominatorTree,
+  /// MachineLoopInfo, and MachineTraceMetrics analyses.
+  virtual bool addILPOpts() {
+    return false;
+  }
+
+  /// This method may be implemented by targets that want to run passes
+  /// immediately before register allocation.
+  virtual void addPreRegAlloc() { }
+
+  /// createTargetRegisterAllocator - Create the register allocator pass for
+  /// this target at the current optimization level.
+  virtual FunctionPass *createTargetRegisterAllocator(bool Optimized);
+
+  /// addFastRegAlloc - Add the minimum set of target-independent passes that
+  /// are required for fast register allocation.
+  virtual void addFastRegAlloc(FunctionPass *RegAllocPass);
+
+  /// addOptimizedRegAlloc - Add passes related to register allocation.
+  /// LLVMTargetMachine provides standard regalloc passes for most targets.
+  virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
+
+  /// addPreRewrite - Add passes to the optimized register allocation pipeline
+  /// after register allocation is complete, but before virtual registers are
+  /// rewritten to physical registers.
+  ///
+  /// These passes must preserve VirtRegMap and LiveIntervals, and when running
+  /// after RABasic or RAGreedy, they should take advantage of LiveRegMatrix.
+  /// When these passes run, VirtRegMap contains legal physreg assignments for
+  /// all virtual registers.
+  virtual bool addPreRewrite() {
+    return false;
+  }
+
+  /// This method may be implemented by targets that want to run passes after
+  /// register allocation pass pipeline but before prolog-epilog insertion.
+  virtual void addPostRegAlloc() { }
+
+  /// Add passes that optimize machine instructions after register allocation.
+  virtual void addMachineLateOptimization();
+
+  /// This method may be implemented by targets that want to run passes after
+  /// prolog-epilog insertion and before the second instruction scheduling pass.
+  virtual void addPreSched2() { }
+
+  /// addGCPasses - Add late codegen passes that analyze code for garbage
+  /// collection. This should return true if GC info should be printed after
+  /// these passes.
+  virtual bool addGCPasses();
+
+  /// Add standard basic block placement passes.
+  virtual void addBlockPlacement();
+
+  /// This pass may be implemented by targets that want to run passes
+  /// immediately before machine code is emitted.
+  virtual void addPreEmitPass() { }
+
+  /// Utilities for targets to add passes to the pass manager.
+  ///
+
+  /// Add a CodeGen pass at this point in the pipeline after checking overrides.
+  /// Return the pass that was added, or zero if no pass was added.
+  /// @p printAfter    if true and adding a machine function pass add an extra
+  ///                  machine printer pass afterwards
+  /// @p verifyAfter   if true and adding a machine function pass add an extra
+  ///                  machine verification pass afterwards.
+  AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true,
+                     bool printAfter = true);
+
+  /// Add a pass to the PassManager if that pass is supposed to be run, as
+  /// determined by the StartAfter and StopAfter options. Takes ownership of the
+  /// pass.
+  /// @p printAfter    if true and adding a machine function pass add an extra
+  ///                  machine printer pass afterwards
+  /// @p verifyAfter   if true and adding a machine function pass add an extra
+  ///                  machine verification pass afterwards.
+  void addPass(Pass *P, bool verifyAfter = true, bool printAfter = true);
+
+  /// addMachinePasses helper to create the target-selected or overriden
+  /// regalloc pass.
+  FunctionPass *createRegAllocPass(bool Optimized);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/UnreachableBlockElim.h b/include/llvm/CodeGen/UnreachableBlockElim.h
new file mode 100644
index 0000000000000..3e7afd4cd4337
--- /dev/null
+++ b/include/llvm/CodeGen/UnreachableBlockElim.h
@@ -0,0 +1,37 @@
+//===-- UnreachableBlockElim.h - Remove unreachable blocks for codegen --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass.  Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node.  To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack.  In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks.  Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_UNREACHABLEBLOCKELIM_H
+#define LLVM_LIB_CODEGEN_UNREACHABLEBLOCKELIM_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class UnreachableBlockElimPass
+    : public PassInfoMixin<UnreachableBlockElimPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_UNREACHABLEBLOCKELIM_H
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 929eb88a0393d..524a90803df89 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -124,6 +124,11 @@ namespace llvm {
       return isSimple() ? V.isInteger() : isExtendedInteger();
     }
 
+    /// isScalarInteger - Return true if this is an integer, but not a vector.
+    bool isScalarInteger() const {
+      return isSimple() ? V.isScalarInteger() : isExtendedScalarInteger();
+    }
+
     /// isVector - Return true if this is a vector value type.
     bool isVector() const {
       return isSimple() ? V.isVector() : isExtendedVector();
@@ -367,6 +372,7 @@ namespace llvm {
                                    unsigned NumElements);
     bool isExtendedFloatingPoint() const LLVM_READONLY;
     bool isExtendedInteger() const LLVM_READONLY;
+    bool isExtendedScalarInteger() const LLVM_READONLY;
     bool isExtendedVector() const LLVM_READONLY;
     bool isExtended16BitVector() const LLVM_READONLY;
     bool isExtended32BitVector() const LLVM_READONLY;
@@ -378,7 +384,7 @@ namespace llvm {
     bool isExtended2048BitVector() const LLVM_READONLY;
     EVT getExtendedVectorElementType() const;
     unsigned getExtendedVectorNumElements() const LLVM_READONLY;
-    unsigned getExtendedSizeInBits() const;
+    unsigned getExtendedSizeInBits() const LLVM_READONLY;
   };
 
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index f29ec42714e8d..f7b1661d7451b 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -39,8 +39,8 @@ def v8i1   : ValueType<8 ,  15>;   //   8 x i1 vector value
 def v16i1  : ValueType<16,  16>;   //  16 x i1 vector value
 def v32i1  : ValueType<32 , 17>;   //  32 x i1 vector value
 def v64i1  : ValueType<64 , 18>;   //  64 x i1 vector value
-def v512i1 : ValueType<512, 19>;   // 512 x i8 vector value
-def v1024i1: ValueType<1024,20>;   //1024 x i8 vector value
+def v512i1 : ValueType<512, 19>;   // 512 x i1 vector value
+def v1024i1: ValueType<1024,20>;   //1024 x i1 vector value
 
 def v1i8   : ValueType<16,  21>;   //  1 x i8  vector value
 def v2i8   : ValueType<16 , 22>;   //  2 x i8  vector value
@@ -96,24 +96,24 @@ def x86mmx : ValueType<64 , 64>;   // X86 MMX value
 def FlagVT : ValueType<0  , 65>;   // Pre-RA sched glue
 def isVoid : ValueType<0  , 66>;   // Produces no value
 def untyped: ValueType<8  , 67>;   // Produces an untyped value
-def token  : ValueType<0  , 249>;  // TokenTy
-def MetadataVT: ValueType<0, 250>; // Metadata
+def token  : ValueType<0  , 120>;  // TokenTy
+def MetadataVT: ValueType<0, 121>; // Metadata
 
 // Pseudo valuetype mapped to the current pointer size to any address space.
 // Should only be used in TableGen.
-def iPTRAny   : ValueType<0, 251>;
+def iPTRAny   : ValueType<0, 122>;
 
 // Pseudo valuetype to represent "vector of any size"
-def vAny   : ValueType<0  , 252>;
+def vAny   : ValueType<0  , 123>;
 
 // Pseudo valuetype to represent "float of any format"
-def fAny   : ValueType<0  , 253>;
+def fAny   : ValueType<0  , 124>;
 
 // Pseudo valuetype to represent "integer of any bit width"
-def iAny   : ValueType<0  , 254>;
+def iAny   : ValueType<0  , 125>;
 
 // Pseudo valuetype mapped to the current pointer size.
-def iPTR   : ValueType<0  , 255>;
+def iPTR   : ValueType<0  , 126>;
 
 // Pseudo valuetype to represent "any type of any size".
-def Any    : ValueType<0  , 256>;
+def Any    : ValueType<0  , 127>;
diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h
index 46c1029f62cf3..dd730495a5f61 100644
--- a/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/IR/Instructions.h"
 
 namespace llvm {
 class AllocaInst;
@@ -108,6 +109,7 @@ struct WinEHFuncInfo {
 
   int EHRegNodeFrameIndex = INT_MAX;
   int EHRegNodeEndOffset = INT_MAX;
+  int EHGuardFrameIndex = INT_MAX;
   int SEHSetFrameOffset = INT_MAX;
 
   WinEHFuncInfo();
author	Dimitry Andric <dim@FreeBSD.org>	2016-07-23 20:41:05 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2016-07-23 20:41:05 +0000
commit	01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch)
tree	4def12e759965de927d963ac65840d663ef9d1ea /include/llvm/CodeGen
parent	f0f4822ed4b66e3579e92a89f368f8fb860e218e (diff)